4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState
*next_cpu
;
71 bool cpu_is_stopped(CPUState
*cpu
)
73 return cpu
->stopped
|| !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState
*cpu
)
78 if (cpu
->stop
|| cpu
->queued_work_first
) {
81 if (cpu_is_stopped(cpu
)) {
84 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu
)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static int64_t vm_clock_warp_start
= -1;
109 /* Conversion factor from emulated instructions to virtual clock ticks. */
110 static int icount_time_shift
;
111 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112 #define MAX_ICOUNT_SHIFT 10
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias
;
133 /* Only written by TCG thread */
137 static TimersState timers_state
;
139 int64_t cpu_get_icount_raw(void)
142 CPUState
*cpu
= current_cpu
;
144 icount
= timers_state
.qemu_icount
;
146 if (!cpu_can_do_io(cpu
)) {
147 fprintf(stderr
, "Bad icount read\n");
150 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
155 /* Return the virtual CPU time, based on the instruction counter. */
156 static int64_t cpu_get_icount_locked(void)
158 int64_t icount
= cpu_get_icount_raw();
159 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
162 int64_t cpu_get_icount(void)
168 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
169 icount
= cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
175 int64_t cpu_icount_to_ns(int64_t icount
)
177 return icount
<< icount_time_shift
;
180 /* return the host CPU cycle counter and handle stop/restart */
181 /* Caller must hold the BQL */
182 int64_t cpu_get_ticks(void)
187 return cpu_get_icount();
190 ticks
= timers_state
.cpu_ticks_offset
;
191 if (timers_state
.cpu_ticks_enabled
) {
192 ticks
+= cpu_get_real_ticks();
195 if (timers_state
.cpu_ticks_prev
> ticks
) {
196 /* Note: non increasing ticks may happen if the host uses
198 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
199 ticks
= timers_state
.cpu_ticks_prev
;
202 timers_state
.cpu_ticks_prev
= ticks
;
206 static int64_t cpu_get_clock_locked(void)
210 ticks
= timers_state
.cpu_clock_offset
;
211 if (timers_state
.cpu_ticks_enabled
) {
212 ticks
+= get_clock();
218 /* return the host CPU monotonic timer and handle stop/restart */
219 int64_t cpu_get_clock(void)
225 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
226 ti
= cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
232 /* enable cpu_get_ticks()
233 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
235 void cpu_enable_ticks(void)
237 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
238 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
239 if (!timers_state
.cpu_ticks_enabled
) {
240 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
241 timers_state
.cpu_clock_offset
-= get_clock();
242 timers_state
.cpu_ticks_enabled
= 1;
244 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
247 /* disable cpu_get_ticks() : the clock is stopped. You must not call
248 * cpu_get_ticks() after that.
249 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
251 void cpu_disable_ticks(void)
253 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
254 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
255 if (timers_state
.cpu_ticks_enabled
) {
256 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
257 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
258 timers_state
.cpu_ticks_enabled
= 0;
260 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
263 /* Correlation between real and virtual time is always going to be
264 fairly approximate, so ignore small variation.
265 When the guest is idle real and virtual time will be aligned in
267 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
269 static void icount_adjust(void)
275 /* Protected by TimersState mutex. */
276 static int64_t last_delta
;
278 /* If the VM is not running, then do nothing. */
279 if (!runstate_is_running()) {
283 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
284 cur_time
= cpu_get_clock_locked();
285 cur_icount
= cpu_get_icount_locked();
287 delta
= cur_icount
- cur_time
;
288 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
290 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
291 && icount_time_shift
> 0) {
292 /* The guest is getting too far ahead. Slow time down. */
296 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
297 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
298 /* The guest is getting too far behind. Speed time up. */
302 timers_state
.qemu_icount_bias
= cur_icount
303 - (timers_state
.qemu_icount
<< icount_time_shift
);
304 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
307 static void icount_adjust_rt(void *opaque
)
309 timer_mod(icount_rt_timer
,
310 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
314 static void icount_adjust_vm(void *opaque
)
316 timer_mod(icount_vm_timer
,
317 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
318 get_ticks_per_sec() / 10);
322 static int64_t qemu_icount_round(int64_t count
)
324 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
327 static void icount_warp_rt(void *opaque
)
329 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
330 * changes from -1 to another value, so the race here is okay.
332 if (atomic_read(&vm_clock_warp_start
) == -1) {
336 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
337 if (runstate_is_running()) {
338 int64_t clock
= cpu_get_clock_locked();
341 warp_delta
= clock
- vm_clock_warp_start
;
342 if (use_icount
== 2) {
344 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
345 * far ahead of real time.
347 int64_t cur_icount
= cpu_get_icount_locked();
348 int64_t delta
= clock
- cur_icount
;
349 warp_delta
= MIN(warp_delta
, delta
);
351 timers_state
.qemu_icount_bias
+= warp_delta
;
353 vm_clock_warp_start
= -1;
354 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
356 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
361 void qtest_clock_warp(int64_t dest
)
363 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
364 AioContext
*aio_context
;
365 assert(qtest_enabled());
366 aio_context
= qemu_get_aio_context();
367 while (clock
< dest
) {
368 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
369 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
371 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
372 timers_state
.qemu_icount_bias
+= warp
;
373 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
375 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
376 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
377 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
379 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
382 void qemu_clock_warp(QEMUClockType type
)
388 * There are too many global variables to make the "warp" behavior
389 * applicable to other clocks. But a clock argument removes the
390 * need for if statements all over the place.
392 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
397 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
398 * This ensures that the deadline for the timer is computed correctly below.
399 * This also makes sure that the insn counter is synchronized before the
400 * CPU starts running, in case the CPU is woken by an event other than
401 * the earliest QEMU_CLOCK_VIRTUAL timer.
403 icount_warp_rt(NULL
);
404 timer_del(icount_warp_timer
);
405 if (!all_cpu_threads_idle()) {
409 if (qtest_enabled()) {
410 /* When testing, qtest commands advance icount. */
414 /* We want to use the earliest deadline from ALL vm_clocks */
415 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
416 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
423 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
424 * sleep. Otherwise, the CPU might be waiting for a future timer
425 * interrupt to wake it up, but the interrupt never comes because
426 * the vCPU isn't running any insns and thus doesn't advance the
427 * QEMU_CLOCK_VIRTUAL.
429 * An extreme solution for this problem would be to never let VCPUs
430 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
431 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
432 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
433 * after some "real" time, (related to the time left until the next
434 * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
435 * This avoids that the warps are visible externally; for example,
436 * you will not be sending network packets continuously instead of
439 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
440 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
441 vm_clock_warp_start
= clock
;
443 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
444 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
445 } else if (deadline
== 0) {
446 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
450 static bool icount_state_needed(void *opaque
)
456 * This is a subsection for icount migration.
458 static const VMStateDescription icount_vmstate_timers
= {
459 .name
= "timer/icount",
461 .minimum_version_id
= 1,
462 .fields
= (VMStateField
[]) {
463 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
464 VMSTATE_INT64(qemu_icount
, TimersState
),
465 VMSTATE_END_OF_LIST()
469 static const VMStateDescription vmstate_timers
= {
472 .minimum_version_id
= 1,
473 .fields
= (VMStateField
[]) {
474 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
475 VMSTATE_INT64(dummy
, TimersState
),
476 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
477 VMSTATE_END_OF_LIST()
479 .subsections
= (VMStateSubsection
[]) {
481 .vmsd
= &icount_vmstate_timers
,
482 .needed
= icount_state_needed
,
489 void cpu_ticks_init(void)
491 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
492 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
495 void configure_icount(QemuOpts
*opts
, Error
**errp
)
498 char *rem_str
= NULL
;
500 option
= qemu_opt_get(opts
, "shift");
502 if (qemu_opt_get(opts
, "align") != NULL
) {
503 error_setg(errp
, "Please specify shift option when using align");
507 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
508 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
509 icount_warp_rt
, NULL
);
510 if (strcmp(option
, "auto") != 0) {
512 icount_time_shift
= strtol(option
, &rem_str
, 0);
513 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
514 error_setg(errp
, "icount: Invalid shift value");
518 } else if (icount_align_option
) {
519 error_setg(errp
, "shift=auto and align=on are incompatible");
524 /* 125MIPS seems a reasonable initial guess at the guest speed.
525 It will be corrected fairly quickly anyway. */
526 icount_time_shift
= 3;
528 /* Have both realtime and virtual time triggers for speed adjustment.
529 The realtime trigger catches emulated time passing too slowly,
530 the virtual time trigger catches emulated time passing too fast.
531 Realtime triggers occur even when idle, so use them less frequently
533 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
534 icount_adjust_rt
, NULL
);
535 timer_mod(icount_rt_timer
,
536 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
537 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
538 icount_adjust_vm
, NULL
);
539 timer_mod(icount_vm_timer
,
540 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
541 get_ticks_per_sec() / 10);
544 /***********************************************************/
545 void hw_error(const char *fmt
, ...)
551 fprintf(stderr
, "qemu: hardware error: ");
552 vfprintf(stderr
, fmt
, ap
);
553 fprintf(stderr
, "\n");
555 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
556 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
562 void cpu_synchronize_all_states(void)
567 cpu_synchronize_state(cpu
);
571 void cpu_synchronize_all_post_reset(void)
576 cpu_synchronize_post_reset(cpu
);
580 void cpu_synchronize_all_post_init(void)
585 cpu_synchronize_post_init(cpu
);
589 void cpu_clean_all_dirty(void)
594 cpu_clean_state(cpu
);
598 static int do_vm_stop(RunState state
)
602 if (runstate_is_running()) {
606 vm_state_notify(0, state
);
607 qapi_event_send_stop(&error_abort
);
611 ret
= bdrv_flush_all();
616 static bool cpu_can_run(CPUState
*cpu
)
621 if (cpu_is_stopped(cpu
)) {
627 static void cpu_handle_guest_debug(CPUState
*cpu
)
629 gdb_set_stop_cpu(cpu
);
630 qemu_system_debug_request();
634 static void cpu_signal(int sig
)
637 cpu_exit(current_cpu
);
643 static void sigbus_reraise(void)
646 struct sigaction action
;
648 memset(&action
, 0, sizeof(action
));
649 action
.sa_handler
= SIG_DFL
;
650 if (!sigaction(SIGBUS
, &action
, NULL
)) {
653 sigaddset(&set
, SIGBUS
);
654 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
656 perror("Failed to re-raise SIGBUS!\n");
660 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
663 if (kvm_on_sigbus(siginfo
->ssi_code
,
664 (void *)(intptr_t)siginfo
->ssi_addr
)) {
669 static void qemu_init_sigbus(void)
671 struct sigaction action
;
673 memset(&action
, 0, sizeof(action
));
674 action
.sa_flags
= SA_SIGINFO
;
675 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
676 sigaction(SIGBUS
, &action
, NULL
);
678 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
681 static void qemu_kvm_eat_signals(CPUState
*cpu
)
683 struct timespec ts
= { 0, 0 };
689 sigemptyset(&waitset
);
690 sigaddset(&waitset
, SIG_IPI
);
691 sigaddset(&waitset
, SIGBUS
);
694 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
695 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
696 perror("sigtimedwait");
702 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
710 r
= sigpending(&chkset
);
712 perror("sigpending");
715 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
718 #else /* !CONFIG_LINUX */
720 static void qemu_init_sigbus(void)
724 static void qemu_kvm_eat_signals(CPUState
*cpu
)
727 #endif /* !CONFIG_LINUX */
730 static void dummy_signal(int sig
)
734 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
738 struct sigaction sigact
;
740 memset(&sigact
, 0, sizeof(sigact
));
741 sigact
.sa_handler
= dummy_signal
;
742 sigaction(SIG_IPI
, &sigact
, NULL
);
744 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
745 sigdelset(&set
, SIG_IPI
);
746 sigdelset(&set
, SIGBUS
);
747 r
= kvm_set_signal_mask(cpu
, &set
);
749 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
754 static void qemu_tcg_init_cpu_signals(void)
757 struct sigaction sigact
;
759 memset(&sigact
, 0, sizeof(sigact
));
760 sigact
.sa_handler
= cpu_signal
;
761 sigaction(SIG_IPI
, &sigact
, NULL
);
764 sigaddset(&set
, SIG_IPI
);
765 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
769 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
774 static void qemu_tcg_init_cpu_signals(void)
779 static QemuMutex qemu_global_mutex
;
780 static QemuCond qemu_io_proceeded_cond
;
781 static bool iothread_requesting_mutex
;
783 static QemuThread io_thread
;
785 static QemuThread
*tcg_cpu_thread
;
786 static QemuCond
*tcg_halt_cond
;
789 static QemuCond qemu_cpu_cond
;
791 static QemuCond qemu_pause_cond
;
792 static QemuCond qemu_work_cond
;
794 void qemu_init_cpu_loop(void)
797 qemu_cond_init(&qemu_cpu_cond
);
798 qemu_cond_init(&qemu_pause_cond
);
799 qemu_cond_init(&qemu_work_cond
);
800 qemu_cond_init(&qemu_io_proceeded_cond
);
801 qemu_mutex_init(&qemu_global_mutex
);
803 qemu_thread_get_self(&io_thread
);
806 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
808 struct qemu_work_item wi
;
810 if (qemu_cpu_is_self(cpu
)) {
818 if (cpu
->queued_work_first
== NULL
) {
819 cpu
->queued_work_first
= &wi
;
821 cpu
->queued_work_last
->next
= &wi
;
823 cpu
->queued_work_last
= &wi
;
829 CPUState
*self_cpu
= current_cpu
;
831 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
832 current_cpu
= self_cpu
;
836 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
838 struct qemu_work_item
*wi
;
840 if (qemu_cpu_is_self(cpu
)) {
845 wi
= g_malloc0(sizeof(struct qemu_work_item
));
849 if (cpu
->queued_work_first
== NULL
) {
850 cpu
->queued_work_first
= wi
;
852 cpu
->queued_work_last
->next
= wi
;
854 cpu
->queued_work_last
= wi
;
861 static void flush_queued_work(CPUState
*cpu
)
863 struct qemu_work_item
*wi
;
865 if (cpu
->queued_work_first
== NULL
) {
869 while ((wi
= cpu
->queued_work_first
)) {
870 cpu
->queued_work_first
= wi
->next
;
877 cpu
->queued_work_last
= NULL
;
878 qemu_cond_broadcast(&qemu_work_cond
);
881 static void qemu_wait_io_event_common(CPUState
*cpu
)
886 qemu_cond_signal(&qemu_pause_cond
);
888 flush_queued_work(cpu
);
889 cpu
->thread_kicked
= false;
892 static void qemu_tcg_wait_io_event(void)
896 while (all_cpu_threads_idle()) {
897 /* Start accounting real time to the virtual clock if the CPUs
899 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
900 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
903 while (iothread_requesting_mutex
) {
904 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
908 qemu_wait_io_event_common(cpu
);
912 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
914 while (cpu_thread_is_idle(cpu
)) {
915 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
918 qemu_kvm_eat_signals(cpu
);
919 qemu_wait_io_event_common(cpu
);
922 static void *qemu_kvm_cpu_thread_fn(void *arg
)
927 qemu_mutex_lock(&qemu_global_mutex
);
928 qemu_thread_get_self(cpu
->thread
);
929 cpu
->thread_id
= qemu_get_thread_id();
933 r
= kvm_init_vcpu(cpu
);
935 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
939 qemu_kvm_init_cpu_signals(cpu
);
941 /* signal CPU creation */
943 qemu_cond_signal(&qemu_cpu_cond
);
946 if (cpu_can_run(cpu
)) {
947 r
= kvm_cpu_exec(cpu
);
948 if (r
== EXCP_DEBUG
) {
949 cpu_handle_guest_debug(cpu
);
952 qemu_kvm_wait_io_event(cpu
);
958 static void *qemu_dummy_cpu_thread_fn(void *arg
)
961 fprintf(stderr
, "qtest is not supported under Windows\n");
968 qemu_mutex_lock_iothread();
969 qemu_thread_get_self(cpu
->thread
);
970 cpu
->thread_id
= qemu_get_thread_id();
973 sigemptyset(&waitset
);
974 sigaddset(&waitset
, SIG_IPI
);
976 /* signal CPU creation */
978 qemu_cond_signal(&qemu_cpu_cond
);
983 qemu_mutex_unlock_iothread();
986 r
= sigwait(&waitset
, &sig
);
987 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
992 qemu_mutex_lock_iothread();
994 qemu_wait_io_event_common(cpu
);
1001 static void tcg_exec_all(void);
1003 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1005 CPUState
*cpu
= arg
;
1007 qemu_tcg_init_cpu_signals();
1008 qemu_thread_get_self(cpu
->thread
);
1010 qemu_mutex_lock(&qemu_global_mutex
);
1012 cpu
->thread_id
= qemu_get_thread_id();
1013 cpu
->created
= true;
1016 qemu_cond_signal(&qemu_cpu_cond
);
1018 /* wait for initial kick-off after machine start */
1019 while (QTAILQ_FIRST(&cpus
)->stopped
) {
1020 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1022 /* process any pending work */
1024 qemu_wait_io_event_common(cpu
);
1032 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1034 if (deadline
== 0) {
1035 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1038 qemu_tcg_wait_io_event();
1044 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1049 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1051 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1055 if (!qemu_cpu_is_self(cpu
)) {
1058 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1059 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1064 /* On multi-core systems, we are not sure that the thread is actually
1065 * suspended until we can get the context.
1067 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1068 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1074 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1075 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1083 void qemu_cpu_kick(CPUState
*cpu
)
1085 qemu_cond_broadcast(cpu
->halt_cond
);
1086 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1087 qemu_cpu_kick_thread(cpu
);
1088 cpu
->thread_kicked
= true;
1092 void qemu_cpu_kick_self(void)
1095 assert(current_cpu
);
1097 if (!current_cpu
->thread_kicked
) {
1098 qemu_cpu_kick_thread(current_cpu
);
1099 current_cpu
->thread_kicked
= true;
1106 bool qemu_cpu_is_self(CPUState
*cpu
)
1108 return qemu_thread_is_self(cpu
->thread
);
1111 bool qemu_in_vcpu_thread(void)
1113 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1116 void qemu_mutex_lock_iothread(void)
1118 if (!tcg_enabled()) {
1119 qemu_mutex_lock(&qemu_global_mutex
);
1121 iothread_requesting_mutex
= true;
1122 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1123 qemu_cpu_kick_thread(first_cpu
);
1124 qemu_mutex_lock(&qemu_global_mutex
);
1126 iothread_requesting_mutex
= false;
1127 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1131 void qemu_mutex_unlock_iothread(void)
1133 qemu_mutex_unlock(&qemu_global_mutex
);
1136 static int all_vcpus_paused(void)
1141 if (!cpu
->stopped
) {
1149 void pause_all_vcpus(void)
1153 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1159 if (qemu_in_vcpu_thread()) {
1161 if (!kvm_enabled()) {
1164 cpu
->stopped
= true;
1170 while (!all_vcpus_paused()) {
1171 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1178 void cpu_resume(CPUState
*cpu
)
1181 cpu
->stopped
= false;
1185 void resume_all_vcpus(void)
1189 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1195 /* For temporary buffers for forming a name */
1196 #define VCPU_THREAD_NAME_SIZE 16
1198 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1200 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1202 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1204 /* share a single thread for all cpus with TCG */
1205 if (!tcg_cpu_thread
) {
1206 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1207 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1208 qemu_cond_init(cpu
->halt_cond
);
1209 tcg_halt_cond
= cpu
->halt_cond
;
1210 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1212 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1213 cpu
, QEMU_THREAD_JOINABLE
);
1215 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1217 while (!cpu
->created
) {
1218 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1220 tcg_cpu_thread
= cpu
->thread
;
1222 cpu
->thread
= tcg_cpu_thread
;
1223 cpu
->halt_cond
= tcg_halt_cond
;
1227 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1229 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1231 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1232 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1233 qemu_cond_init(cpu
->halt_cond
);
1234 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1236 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1237 cpu
, QEMU_THREAD_JOINABLE
);
1238 while (!cpu
->created
) {
1239 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1243 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1245 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1247 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1248 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1249 qemu_cond_init(cpu
->halt_cond
);
1250 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1252 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1253 QEMU_THREAD_JOINABLE
);
1254 while (!cpu
->created
) {
1255 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1259 void qemu_init_vcpu(CPUState
*cpu
)
1261 cpu
->nr_cores
= smp_cores
;
1262 cpu
->nr_threads
= smp_threads
;
1263 cpu
->stopped
= true;
1264 if (kvm_enabled()) {
1265 qemu_kvm_start_vcpu(cpu
);
1266 } else if (tcg_enabled()) {
1267 qemu_tcg_init_vcpu(cpu
);
1269 qemu_dummy_start_vcpu(cpu
);
1273 void cpu_stop_current(void)
1276 current_cpu
->stop
= false;
1277 current_cpu
->stopped
= true;
1278 cpu_exit(current_cpu
);
1279 qemu_cond_signal(&qemu_pause_cond
);
1283 int vm_stop(RunState state
)
1285 if (qemu_in_vcpu_thread()) {
1286 qemu_system_vmstop_request_prepare();
1287 qemu_system_vmstop_request(state
);
1289 * FIXME: should not return to device code in case
1290 * vm_stop() has been requested.
1296 return do_vm_stop(state
);
1299 /* does a state transition even if the VM is already stopped,
1300 current state is forgotten forever */
1301 int vm_stop_force_state(RunState state
)
1303 if (runstate_is_running()) {
1304 return vm_stop(state
);
1306 runstate_set(state
);
1307 /* Make sure to return an error if the flush in a previous vm_stop()
1309 return bdrv_flush_all();
1313 static int tcg_cpu_exec(CPUArchState
*env
)
1315 CPUState
*cpu
= ENV_GET_CPU(env
);
1317 #ifdef CONFIG_PROFILER
1321 #ifdef CONFIG_PROFILER
1322 ti
= profile_getclock();
1328 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1329 + cpu
->icount_extra
);
1330 cpu
->icount_decr
.u16
.low
= 0;
1331 cpu
->icount_extra
= 0;
1332 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1334 /* Maintain prior (possibly buggy) behaviour where if no deadline
1335 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1336 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1339 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1340 deadline
= INT32_MAX
;
1343 count
= qemu_icount_round(deadline
);
1344 timers_state
.qemu_icount
+= count
;
1345 decr
= (count
> 0xffff) ? 0xffff : count
;
1347 cpu
->icount_decr
.u16
.low
= decr
;
1348 cpu
->icount_extra
= count
;
1350 ret
= cpu_exec(env
);
1351 #ifdef CONFIG_PROFILER
1352 qemu_time
+= profile_getclock() - ti
;
1355 /* Fold pending instructions back into the
1356 instruction counter, and clear the interrupt flag. */
1357 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1358 + cpu
->icount_extra
);
1359 cpu
->icount_decr
.u32
= 0;
1360 cpu
->icount_extra
= 0;
1365 static void tcg_exec_all(void)
1369 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1370 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1372 if (next_cpu
== NULL
) {
1373 next_cpu
= first_cpu
;
1375 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1376 CPUState
*cpu
= next_cpu
;
1377 CPUArchState
*env
= cpu
->env_ptr
;
1379 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1380 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1382 if (cpu_can_run(cpu
)) {
1383 r
= tcg_cpu_exec(env
);
1384 if (r
== EXCP_DEBUG
) {
1385 cpu_handle_guest_debug(cpu
);
1388 } else if (cpu
->stop
|| cpu
->stopped
) {
1395 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1397 /* XXX: implement xxx_cpu_list for targets that still miss it */
1398 #if defined(cpu_list)
1399 cpu_list(f
, cpu_fprintf
);
1403 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1405 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1410 #if defined(TARGET_I386)
1411 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1412 CPUX86State
*env
= &x86_cpu
->env
;
1413 #elif defined(TARGET_PPC)
1414 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1415 CPUPPCState
*env
= &ppc_cpu
->env
;
1416 #elif defined(TARGET_SPARC)
1417 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1418 CPUSPARCState
*env
= &sparc_cpu
->env
;
1419 #elif defined(TARGET_MIPS)
1420 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1421 CPUMIPSState
*env
= &mips_cpu
->env
;
1422 #elif defined(TARGET_TRICORE)
1423 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1424 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1427 cpu_synchronize_state(cpu
);
1429 info
= g_malloc0(sizeof(*info
));
1430 info
->value
= g_malloc0(sizeof(*info
->value
));
1431 info
->value
->CPU
= cpu
->cpu_index
;
1432 info
->value
->current
= (cpu
== first_cpu
);
1433 info
->value
->halted
= cpu
->halted
;
1434 info
->value
->thread_id
= cpu
->thread_id
;
1435 #if defined(TARGET_I386)
1436 info
->value
->has_pc
= true;
1437 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1438 #elif defined(TARGET_PPC)
1439 info
->value
->has_nip
= true;
1440 info
->value
->nip
= env
->nip
;
1441 #elif defined(TARGET_SPARC)
1442 info
->value
->has_pc
= true;
1443 info
->value
->pc
= env
->pc
;
1444 info
->value
->has_npc
= true;
1445 info
->value
->npc
= env
->npc
;
1446 #elif defined(TARGET_MIPS)
1447 info
->value
->has_PC
= true;
1448 info
->value
->PC
= env
->active_tc
.PC
;
1449 #elif defined(TARGET_TRICORE)
1450 info
->value
->has_PC
= true;
1451 info
->value
->PC
= env
->PC
;
1454 /* XXX: waiting for the qapi to support GSList */
1456 head
= cur_item
= info
;
1458 cur_item
->next
= info
;
1466 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1467 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1478 cpu
= qemu_get_cpu(cpu_index
);
1480 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1485 f
= fopen(filename
, "wb");
1487 error_setg_file_open(errp
, errno
, filename
);
1495 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1496 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1499 if (fwrite(buf
, 1, l
, f
) != l
) {
1500 error_set(errp
, QERR_IO_ERROR
);
1511 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1518 f
= fopen(filename
, "wb");
1520 error_setg_file_open(errp
, errno
, filename
);
1528 cpu_physical_memory_read(addr
, buf
, l
);
1529 if (fwrite(buf
, 1, l
, f
) != l
) {
1530 error_set(errp
, QERR_IO_ERROR
);
1541 void qmp_inject_nmi(Error
**errp
)
1543 #if defined(TARGET_I386)
1547 X86CPU
*cpu
= X86_CPU(cs
);
1549 if (!cpu
->apic_state
) {
1550 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1552 apic_deliver_nmi(cpu
->apic_state
);
1556 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1560 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1566 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1567 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1568 if (icount_align_option
) {
1569 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1570 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1572 cpu_fprintf(f
, "Max guest delay NA\n");
1573 cpu_fprintf(f
, "Max guest advance NA\n");