kvm_stat: Add kvm_exit reasons for aarch64
[qemu/ar7.git] / cpus.c
blob0cdd1d71560bec11c4bd8eed51762a72289ef9a6
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
43 #include "hw/nmi.h"
45 #ifndef _WIN32
46 #include "qemu/compatfd.h"
47 #endif
49 #ifdef CONFIG_LINUX
51 #include <sys/prctl.h>
53 #ifndef PR_MCE_KILL
54 #define PR_MCE_KILL 33
55 #endif
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
59 #endif
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
63 #endif
65 #endif /* CONFIG_LINUX */
67 static CPUState *next_cpu;
68 int64_t max_delay;
69 int64_t max_advance;
71 bool cpu_is_stopped(CPUState *cpu)
73 return cpu->stopped || !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState *cpu)
78 if (cpu->stop || cpu->queued_work_first) {
79 return false;
81 if (cpu_is_stopped(cpu)) {
82 return true;
84 if (!cpu->halted || cpu_has_work(cpu) ||
85 kvm_halt_in_kernel()) {
86 return false;
88 return true;
91 static bool all_cpu_threads_idle(void)
93 CPUState *cpu;
95 CPU_FOREACH(cpu) {
96 if (!cpu_thread_is_idle(cpu)) {
97 return false;
100 return true;
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static int64_t vm_clock_warp_start = -1;
109 /* Conversion factor from emulated instructions to virtual clock ticks. */
110 static int icount_time_shift;
111 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112 #define MAX_ICOUNT_SHIFT 10
114 static QEMUTimer *icount_rt_timer;
115 static QEMUTimer *icount_vm_timer;
116 static QEMUTimer *icount_warp_timer;
118 typedef struct TimersState {
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
126 QemuSeqLock vm_clock_seqlock;
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
135 } TimersState;
137 static TimersState timers_state;
139 int64_t cpu_get_icount_raw(void)
141 int64_t icount;
142 CPUState *cpu = current_cpu;
144 icount = timers_state.qemu_icount;
145 if (cpu) {
146 if (!cpu_can_do_io(cpu)) {
147 fprintf(stderr, "Bad icount read\n");
148 exit(1);
150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
152 return icount;
155 /* Return the virtual CPU time, based on the instruction counter. */
156 static int64_t cpu_get_icount_locked(void)
158 int64_t icount = cpu_get_icount_raw();
159 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
162 int64_t cpu_get_icount(void)
164 int64_t icount;
165 unsigned start;
167 do {
168 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
169 icount = cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
172 return icount;
175 int64_t cpu_icount_to_ns(int64_t icount)
177 return icount << icount_time_shift;
180 /* return the host CPU cycle counter and handle stop/restart */
181 /* Caller must hold the BQL */
182 int64_t cpu_get_ticks(void)
184 int64_t ticks;
186 if (use_icount) {
187 return cpu_get_icount();
190 ticks = timers_state.cpu_ticks_offset;
191 if (timers_state.cpu_ticks_enabled) {
192 ticks += cpu_get_real_ticks();
195 if (timers_state.cpu_ticks_prev > ticks) {
196 /* Note: non increasing ticks may happen if the host uses
197 software suspend */
198 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
199 ticks = timers_state.cpu_ticks_prev;
202 timers_state.cpu_ticks_prev = ticks;
203 return ticks;
206 static int64_t cpu_get_clock_locked(void)
208 int64_t ticks;
210 ticks = timers_state.cpu_clock_offset;
211 if (timers_state.cpu_ticks_enabled) {
212 ticks += get_clock();
215 return ticks;
218 /* return the host CPU monotonic timer and handle stop/restart */
219 int64_t cpu_get_clock(void)
221 int64_t ti;
222 unsigned start;
224 do {
225 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
226 ti = cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
229 return ti;
232 /* enable cpu_get_ticks()
233 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
235 void cpu_enable_ticks(void)
237 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
238 seqlock_write_lock(&timers_state.vm_clock_seqlock);
239 if (!timers_state.cpu_ticks_enabled) {
240 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
241 timers_state.cpu_clock_offset -= get_clock();
242 timers_state.cpu_ticks_enabled = 1;
244 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
247 /* disable cpu_get_ticks() : the clock is stopped. You must not call
248 * cpu_get_ticks() after that.
249 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
251 void cpu_disable_ticks(void)
253 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
254 seqlock_write_lock(&timers_state.vm_clock_seqlock);
255 if (timers_state.cpu_ticks_enabled) {
256 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
257 timers_state.cpu_clock_offset = cpu_get_clock_locked();
258 timers_state.cpu_ticks_enabled = 0;
260 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
263 /* Correlation between real and virtual time is always going to be
264 fairly approximate, so ignore small variation.
265 When the guest is idle real and virtual time will be aligned in
266 the IO wait loop. */
267 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
269 static void icount_adjust(void)
271 int64_t cur_time;
272 int64_t cur_icount;
273 int64_t delta;
275 /* Protected by TimersState mutex. */
276 static int64_t last_delta;
278 /* If the VM is not running, then do nothing. */
279 if (!runstate_is_running()) {
280 return;
283 seqlock_write_lock(&timers_state.vm_clock_seqlock);
284 cur_time = cpu_get_clock_locked();
285 cur_icount = cpu_get_icount_locked();
287 delta = cur_icount - cur_time;
288 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
289 if (delta > 0
290 && last_delta + ICOUNT_WOBBLE < delta * 2
291 && icount_time_shift > 0) {
292 /* The guest is getting too far ahead. Slow time down. */
293 icount_time_shift--;
295 if (delta < 0
296 && last_delta - ICOUNT_WOBBLE > delta * 2
297 && icount_time_shift < MAX_ICOUNT_SHIFT) {
298 /* The guest is getting too far behind. Speed time up. */
299 icount_time_shift++;
301 last_delta = delta;
302 timers_state.qemu_icount_bias = cur_icount
303 - (timers_state.qemu_icount << icount_time_shift);
304 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
307 static void icount_adjust_rt(void *opaque)
309 timer_mod(icount_rt_timer,
310 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
311 icount_adjust();
314 static void icount_adjust_vm(void *opaque)
316 timer_mod(icount_vm_timer,
317 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
318 get_ticks_per_sec() / 10);
319 icount_adjust();
322 static int64_t qemu_icount_round(int64_t count)
324 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
327 static void icount_warp_rt(void *opaque)
329 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
330 * changes from -1 to another value, so the race here is okay.
332 if (atomic_read(&vm_clock_warp_start) == -1) {
333 return;
336 seqlock_write_lock(&timers_state.vm_clock_seqlock);
337 if (runstate_is_running()) {
338 int64_t clock = cpu_get_clock_locked();
339 int64_t warp_delta;
341 warp_delta = clock - vm_clock_warp_start;
342 if (use_icount == 2) {
344 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
345 * far ahead of real time.
347 int64_t cur_icount = cpu_get_icount_locked();
348 int64_t delta = clock - cur_icount;
349 warp_delta = MIN(warp_delta, delta);
351 timers_state.qemu_icount_bias += warp_delta;
353 vm_clock_warp_start = -1;
354 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
356 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
361 void qtest_clock_warp(int64_t dest)
363 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
364 assert(qtest_enabled());
365 while (clock < dest) {
366 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
367 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
368 seqlock_write_lock(&timers_state.vm_clock_seqlock);
369 timers_state.qemu_icount_bias += warp;
370 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
372 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
373 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
375 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
378 void qemu_clock_warp(QEMUClockType type)
380 int64_t clock;
381 int64_t deadline;
384 * There are too many global variables to make the "warp" behavior
385 * applicable to other clocks. But a clock argument removes the
386 * need for if statements all over the place.
388 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
389 return;
393 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
394 * This ensures that the deadline for the timer is computed correctly below.
395 * This also makes sure that the insn counter is synchronized before the
396 * CPU starts running, in case the CPU is woken by an event other than
397 * the earliest QEMU_CLOCK_VIRTUAL timer.
399 icount_warp_rt(NULL);
400 timer_del(icount_warp_timer);
401 if (!all_cpu_threads_idle()) {
402 return;
405 if (qtest_enabled()) {
406 /* When testing, qtest commands advance icount. */
407 return;
410 /* We want to use the earliest deadline from ALL vm_clocks */
411 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
412 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
413 if (deadline < 0) {
414 return;
417 if (deadline > 0) {
419 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
420 * sleep. Otherwise, the CPU might be waiting for a future timer
421 * interrupt to wake it up, but the interrupt never comes because
422 * the vCPU isn't running any insns and thus doesn't advance the
423 * QEMU_CLOCK_VIRTUAL.
425 * An extreme solution for this problem would be to never let VCPUs
426 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
427 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
428 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
429 * after some "real" time, (related to the time left until the next
430 * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
431 * This avoids that the warps are visible externally; for example,
432 * you will not be sending network packets continuously instead of
433 * every 100ms.
435 seqlock_write_lock(&timers_state.vm_clock_seqlock);
436 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
437 vm_clock_warp_start = clock;
439 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
440 timer_mod_anticipate(icount_warp_timer, clock + deadline);
441 } else if (deadline == 0) {
442 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
446 static bool icount_state_needed(void *opaque)
448 return use_icount;
452 * This is a subsection for icount migration.
454 static const VMStateDescription icount_vmstate_timers = {
455 .name = "timer/icount",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT64(qemu_icount_bias, TimersState),
460 VMSTATE_INT64(qemu_icount, TimersState),
461 VMSTATE_END_OF_LIST()
465 static const VMStateDescription vmstate_timers = {
466 .name = "timer",
467 .version_id = 2,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT64(cpu_ticks_offset, TimersState),
471 VMSTATE_INT64(dummy, TimersState),
472 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
473 VMSTATE_END_OF_LIST()
475 .subsections = (VMStateSubsection[]) {
477 .vmsd = &icount_vmstate_timers,
478 .needed = icount_state_needed,
479 }, {
480 /* empty */
485 void cpu_ticks_init(void)
487 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
488 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
491 void configure_icount(QemuOpts *opts, Error **errp)
493 const char *option;
494 char *rem_str = NULL;
496 option = qemu_opt_get(opts, "shift");
497 if (!option) {
498 if (qemu_opt_get(opts, "align") != NULL) {
499 error_setg(errp, "Please specify shift option when using align");
501 return;
503 icount_align_option = qemu_opt_get_bool(opts, "align", false);
504 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
505 icount_warp_rt, NULL);
506 if (strcmp(option, "auto") != 0) {
507 errno = 0;
508 icount_time_shift = strtol(option, &rem_str, 0);
509 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
510 error_setg(errp, "icount: Invalid shift value");
512 use_icount = 1;
513 return;
514 } else if (icount_align_option) {
515 error_setg(errp, "shift=auto and align=on are incompatible");
518 use_icount = 2;
520 /* 125MIPS seems a reasonable initial guess at the guest speed.
521 It will be corrected fairly quickly anyway. */
522 icount_time_shift = 3;
524 /* Have both realtime and virtual time triggers for speed adjustment.
525 The realtime trigger catches emulated time passing too slowly,
526 the virtual time trigger catches emulated time passing too fast.
527 Realtime triggers occur even when idle, so use them less frequently
528 than VM triggers. */
529 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
530 icount_adjust_rt, NULL);
531 timer_mod(icount_rt_timer,
532 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
533 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
534 icount_adjust_vm, NULL);
535 timer_mod(icount_vm_timer,
536 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
537 get_ticks_per_sec() / 10);
540 /***********************************************************/
541 void hw_error(const char *fmt, ...)
543 va_list ap;
544 CPUState *cpu;
546 va_start(ap, fmt);
547 fprintf(stderr, "qemu: hardware error: ");
548 vfprintf(stderr, fmt, ap);
549 fprintf(stderr, "\n");
550 CPU_FOREACH(cpu) {
551 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
552 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
554 va_end(ap);
555 abort();
558 void cpu_synchronize_all_states(void)
560 CPUState *cpu;
562 CPU_FOREACH(cpu) {
563 cpu_synchronize_state(cpu);
567 void cpu_synchronize_all_post_reset(void)
569 CPUState *cpu;
571 CPU_FOREACH(cpu) {
572 cpu_synchronize_post_reset(cpu);
576 void cpu_synchronize_all_post_init(void)
578 CPUState *cpu;
580 CPU_FOREACH(cpu) {
581 cpu_synchronize_post_init(cpu);
585 void cpu_clean_all_dirty(void)
587 CPUState *cpu;
589 CPU_FOREACH(cpu) {
590 cpu_clean_state(cpu);
594 static int do_vm_stop(RunState state)
596 int ret = 0;
598 if (runstate_is_running()) {
599 cpu_disable_ticks();
600 pause_all_vcpus();
601 runstate_set(state);
602 vm_state_notify(0, state);
603 qapi_event_send_stop(&error_abort);
606 bdrv_drain_all();
607 ret = bdrv_flush_all();
609 return ret;
612 static bool cpu_can_run(CPUState *cpu)
614 if (cpu->stop) {
615 return false;
617 if (cpu_is_stopped(cpu)) {
618 return false;
620 return true;
623 static void cpu_handle_guest_debug(CPUState *cpu)
625 gdb_set_stop_cpu(cpu);
626 qemu_system_debug_request();
627 cpu->stopped = true;
630 static void cpu_signal(int sig)
632 if (current_cpu) {
633 cpu_exit(current_cpu);
635 exit_request = 1;
638 #ifdef CONFIG_LINUX
639 static void sigbus_reraise(void)
641 sigset_t set;
642 struct sigaction action;
644 memset(&action, 0, sizeof(action));
645 action.sa_handler = SIG_DFL;
646 if (!sigaction(SIGBUS, &action, NULL)) {
647 raise(SIGBUS);
648 sigemptyset(&set);
649 sigaddset(&set, SIGBUS);
650 sigprocmask(SIG_UNBLOCK, &set, NULL);
652 perror("Failed to re-raise SIGBUS!\n");
653 abort();
656 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
657 void *ctx)
659 if (kvm_on_sigbus(siginfo->ssi_code,
660 (void *)(intptr_t)siginfo->ssi_addr)) {
661 sigbus_reraise();
665 static void qemu_init_sigbus(void)
667 struct sigaction action;
669 memset(&action, 0, sizeof(action));
670 action.sa_flags = SA_SIGINFO;
671 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
672 sigaction(SIGBUS, &action, NULL);
674 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
677 static void qemu_kvm_eat_signals(CPUState *cpu)
679 struct timespec ts = { 0, 0 };
680 siginfo_t siginfo;
681 sigset_t waitset;
682 sigset_t chkset;
683 int r;
685 sigemptyset(&waitset);
686 sigaddset(&waitset, SIG_IPI);
687 sigaddset(&waitset, SIGBUS);
689 do {
690 r = sigtimedwait(&waitset, &siginfo, &ts);
691 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
692 perror("sigtimedwait");
693 exit(1);
696 switch (r) {
697 case SIGBUS:
698 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
699 sigbus_reraise();
701 break;
702 default:
703 break;
706 r = sigpending(&chkset);
707 if (r == -1) {
708 perror("sigpending");
709 exit(1);
711 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
714 #else /* !CONFIG_LINUX */
716 static void qemu_init_sigbus(void)
720 static void qemu_kvm_eat_signals(CPUState *cpu)
723 #endif /* !CONFIG_LINUX */
725 #ifndef _WIN32
726 static void dummy_signal(int sig)
730 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
732 int r;
733 sigset_t set;
734 struct sigaction sigact;
736 memset(&sigact, 0, sizeof(sigact));
737 sigact.sa_handler = dummy_signal;
738 sigaction(SIG_IPI, &sigact, NULL);
740 pthread_sigmask(SIG_BLOCK, NULL, &set);
741 sigdelset(&set, SIG_IPI);
742 sigdelset(&set, SIGBUS);
743 r = kvm_set_signal_mask(cpu, &set);
744 if (r) {
745 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
746 exit(1);
750 static void qemu_tcg_init_cpu_signals(void)
752 sigset_t set;
753 struct sigaction sigact;
755 memset(&sigact, 0, sizeof(sigact));
756 sigact.sa_handler = cpu_signal;
757 sigaction(SIG_IPI, &sigact, NULL);
759 sigemptyset(&set);
760 sigaddset(&set, SIG_IPI);
761 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
764 #else /* _WIN32 */
765 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
767 abort();
770 static void qemu_tcg_init_cpu_signals(void)
773 #endif /* _WIN32 */
775 static QemuMutex qemu_global_mutex;
776 static QemuCond qemu_io_proceeded_cond;
777 static bool iothread_requesting_mutex;
779 static QemuThread io_thread;
781 static QemuThread *tcg_cpu_thread;
782 static QemuCond *tcg_halt_cond;
784 /* cpu creation */
785 static QemuCond qemu_cpu_cond;
786 /* system init */
787 static QemuCond qemu_pause_cond;
788 static QemuCond qemu_work_cond;
790 void qemu_init_cpu_loop(void)
792 qemu_init_sigbus();
793 qemu_cond_init(&qemu_cpu_cond);
794 qemu_cond_init(&qemu_pause_cond);
795 qemu_cond_init(&qemu_work_cond);
796 qemu_cond_init(&qemu_io_proceeded_cond);
797 qemu_mutex_init(&qemu_global_mutex);
799 qemu_thread_get_self(&io_thread);
802 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
804 struct qemu_work_item wi;
806 if (qemu_cpu_is_self(cpu)) {
807 func(data);
808 return;
811 wi.func = func;
812 wi.data = data;
813 wi.free = false;
814 if (cpu->queued_work_first == NULL) {
815 cpu->queued_work_first = &wi;
816 } else {
817 cpu->queued_work_last->next = &wi;
819 cpu->queued_work_last = &wi;
820 wi.next = NULL;
821 wi.done = false;
823 qemu_cpu_kick(cpu);
824 while (!wi.done) {
825 CPUState *self_cpu = current_cpu;
827 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
828 current_cpu = self_cpu;
832 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
834 struct qemu_work_item *wi;
836 if (qemu_cpu_is_self(cpu)) {
837 func(data);
838 return;
841 wi = g_malloc0(sizeof(struct qemu_work_item));
842 wi->func = func;
843 wi->data = data;
844 wi->free = true;
845 if (cpu->queued_work_first == NULL) {
846 cpu->queued_work_first = wi;
847 } else {
848 cpu->queued_work_last->next = wi;
850 cpu->queued_work_last = wi;
851 wi->next = NULL;
852 wi->done = false;
854 qemu_cpu_kick(cpu);
857 static void flush_queued_work(CPUState *cpu)
859 struct qemu_work_item *wi;
861 if (cpu->queued_work_first == NULL) {
862 return;
865 while ((wi = cpu->queued_work_first)) {
866 cpu->queued_work_first = wi->next;
867 wi->func(wi->data);
868 wi->done = true;
869 if (wi->free) {
870 g_free(wi);
873 cpu->queued_work_last = NULL;
874 qemu_cond_broadcast(&qemu_work_cond);
877 static void qemu_wait_io_event_common(CPUState *cpu)
879 if (cpu->stop) {
880 cpu->stop = false;
881 cpu->stopped = true;
882 qemu_cond_signal(&qemu_pause_cond);
884 flush_queued_work(cpu);
885 cpu->thread_kicked = false;
888 static void qemu_tcg_wait_io_event(void)
890 CPUState *cpu;
892 while (all_cpu_threads_idle()) {
893 /* Start accounting real time to the virtual clock if the CPUs
894 are idle. */
895 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
896 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
899 while (iothread_requesting_mutex) {
900 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
903 CPU_FOREACH(cpu) {
904 qemu_wait_io_event_common(cpu);
908 static void qemu_kvm_wait_io_event(CPUState *cpu)
910 while (cpu_thread_is_idle(cpu)) {
911 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
914 qemu_kvm_eat_signals(cpu);
915 qemu_wait_io_event_common(cpu);
918 static void *qemu_kvm_cpu_thread_fn(void *arg)
920 CPUState *cpu = arg;
921 int r;
923 qemu_mutex_lock(&qemu_global_mutex);
924 qemu_thread_get_self(cpu->thread);
925 cpu->thread_id = qemu_get_thread_id();
926 cpu->can_do_io = 1;
927 current_cpu = cpu;
929 r = kvm_init_vcpu(cpu);
930 if (r < 0) {
931 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
932 exit(1);
935 qemu_kvm_init_cpu_signals(cpu);
937 /* signal CPU creation */
938 cpu->created = true;
939 qemu_cond_signal(&qemu_cpu_cond);
941 while (1) {
942 if (cpu_can_run(cpu)) {
943 r = kvm_cpu_exec(cpu);
944 if (r == EXCP_DEBUG) {
945 cpu_handle_guest_debug(cpu);
948 qemu_kvm_wait_io_event(cpu);
951 return NULL;
954 static void *qemu_dummy_cpu_thread_fn(void *arg)
956 #ifdef _WIN32
957 fprintf(stderr, "qtest is not supported under Windows\n");
958 exit(1);
959 #else
960 CPUState *cpu = arg;
961 sigset_t waitset;
962 int r;
964 qemu_mutex_lock_iothread();
965 qemu_thread_get_self(cpu->thread);
966 cpu->thread_id = qemu_get_thread_id();
967 cpu->can_do_io = 1;
969 sigemptyset(&waitset);
970 sigaddset(&waitset, SIG_IPI);
972 /* signal CPU creation */
973 cpu->created = true;
974 qemu_cond_signal(&qemu_cpu_cond);
976 current_cpu = cpu;
977 while (1) {
978 current_cpu = NULL;
979 qemu_mutex_unlock_iothread();
980 do {
981 int sig;
982 r = sigwait(&waitset, &sig);
983 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
984 if (r == -1) {
985 perror("sigwait");
986 exit(1);
988 qemu_mutex_lock_iothread();
989 current_cpu = cpu;
990 qemu_wait_io_event_common(cpu);
993 return NULL;
994 #endif
997 static void tcg_exec_all(void);
999 static void *qemu_tcg_cpu_thread_fn(void *arg)
1001 CPUState *cpu = arg;
1003 qemu_tcg_init_cpu_signals();
1004 qemu_thread_get_self(cpu->thread);
1006 qemu_mutex_lock(&qemu_global_mutex);
1007 CPU_FOREACH(cpu) {
1008 cpu->thread_id = qemu_get_thread_id();
1009 cpu->created = true;
1010 cpu->can_do_io = 1;
1012 qemu_cond_signal(&qemu_cpu_cond);
1014 /* wait for initial kick-off after machine start */
1015 while (QTAILQ_FIRST(&cpus)->stopped) {
1016 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1018 /* process any pending work */
1019 CPU_FOREACH(cpu) {
1020 qemu_wait_io_event_common(cpu);
1024 while (1) {
1025 tcg_exec_all();
1027 if (use_icount) {
1028 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1030 if (deadline == 0) {
1031 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1034 qemu_tcg_wait_io_event();
1037 return NULL;
1040 static void qemu_cpu_kick_thread(CPUState *cpu)
1042 #ifndef _WIN32
1043 int err;
1045 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1046 if (err) {
1047 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1048 exit(1);
1050 #else /* _WIN32 */
1051 if (!qemu_cpu_is_self(cpu)) {
1052 CONTEXT tcgContext;
1054 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1055 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1056 GetLastError());
1057 exit(1);
1060 /* On multi-core systems, we are not sure that the thread is actually
1061 * suspended until we can get the context.
1063 tcgContext.ContextFlags = CONTEXT_CONTROL;
1064 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1065 continue;
1068 cpu_signal(0);
1070 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1071 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1072 GetLastError());
1073 exit(1);
1076 #endif
1079 void qemu_cpu_kick(CPUState *cpu)
1081 qemu_cond_broadcast(cpu->halt_cond);
1082 if (!tcg_enabled() && !cpu->thread_kicked) {
1083 qemu_cpu_kick_thread(cpu);
1084 cpu->thread_kicked = true;
1088 void qemu_cpu_kick_self(void)
1090 #ifndef _WIN32
1091 assert(current_cpu);
1093 if (!current_cpu->thread_kicked) {
1094 qemu_cpu_kick_thread(current_cpu);
1095 current_cpu->thread_kicked = true;
1097 #else
1098 abort();
1099 #endif
1102 bool qemu_cpu_is_self(CPUState *cpu)
1104 return qemu_thread_is_self(cpu->thread);
1107 static bool qemu_in_vcpu_thread(void)
1109 return current_cpu && qemu_cpu_is_self(current_cpu);
1112 void qemu_mutex_lock_iothread(void)
1114 if (!tcg_enabled()) {
1115 qemu_mutex_lock(&qemu_global_mutex);
1116 } else {
1117 iothread_requesting_mutex = true;
1118 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1119 qemu_cpu_kick_thread(first_cpu);
1120 qemu_mutex_lock(&qemu_global_mutex);
1122 iothread_requesting_mutex = false;
1123 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1127 void qemu_mutex_unlock_iothread(void)
1129 qemu_mutex_unlock(&qemu_global_mutex);
1132 static int all_vcpus_paused(void)
1134 CPUState *cpu;
1136 CPU_FOREACH(cpu) {
1137 if (!cpu->stopped) {
1138 return 0;
1142 return 1;
1145 void pause_all_vcpus(void)
1147 CPUState *cpu;
1149 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1150 CPU_FOREACH(cpu) {
1151 cpu->stop = true;
1152 qemu_cpu_kick(cpu);
1155 if (qemu_in_vcpu_thread()) {
1156 cpu_stop_current();
1157 if (!kvm_enabled()) {
1158 CPU_FOREACH(cpu) {
1159 cpu->stop = false;
1160 cpu->stopped = true;
1162 return;
1166 while (!all_vcpus_paused()) {
1167 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1168 CPU_FOREACH(cpu) {
1169 qemu_cpu_kick(cpu);
1174 void cpu_resume(CPUState *cpu)
1176 cpu->stop = false;
1177 cpu->stopped = false;
1178 qemu_cpu_kick(cpu);
1181 void resume_all_vcpus(void)
1183 CPUState *cpu;
1185 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1186 CPU_FOREACH(cpu) {
1187 cpu_resume(cpu);
1191 /* For temporary buffers for forming a name */
1192 #define VCPU_THREAD_NAME_SIZE 16
1194 static void qemu_tcg_init_vcpu(CPUState *cpu)
1196 char thread_name[VCPU_THREAD_NAME_SIZE];
1198 tcg_cpu_address_space_init(cpu, cpu->as);
1200 /* share a single thread for all cpus with TCG */
1201 if (!tcg_cpu_thread) {
1202 cpu->thread = g_malloc0(sizeof(QemuThread));
1203 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1204 qemu_cond_init(cpu->halt_cond);
1205 tcg_halt_cond = cpu->halt_cond;
1206 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1207 cpu->cpu_index);
1208 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1209 cpu, QEMU_THREAD_JOINABLE);
1210 #ifdef _WIN32
1211 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1212 #endif
1213 while (!cpu->created) {
1214 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1216 tcg_cpu_thread = cpu->thread;
1217 } else {
1218 cpu->thread = tcg_cpu_thread;
1219 cpu->halt_cond = tcg_halt_cond;
1223 static void qemu_kvm_start_vcpu(CPUState *cpu)
1225 char thread_name[VCPU_THREAD_NAME_SIZE];
1227 cpu->thread = g_malloc0(sizeof(QemuThread));
1228 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1229 qemu_cond_init(cpu->halt_cond);
1230 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1231 cpu->cpu_index);
1232 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1233 cpu, QEMU_THREAD_JOINABLE);
1234 while (!cpu->created) {
1235 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1239 static void qemu_dummy_start_vcpu(CPUState *cpu)
1241 char thread_name[VCPU_THREAD_NAME_SIZE];
1243 cpu->thread = g_malloc0(sizeof(QemuThread));
1244 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1245 qemu_cond_init(cpu->halt_cond);
1246 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1247 cpu->cpu_index);
1248 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1249 QEMU_THREAD_JOINABLE);
1250 while (!cpu->created) {
1251 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1255 void qemu_init_vcpu(CPUState *cpu)
1257 cpu->nr_cores = smp_cores;
1258 cpu->nr_threads = smp_threads;
1259 cpu->stopped = true;
1260 if (kvm_enabled()) {
1261 qemu_kvm_start_vcpu(cpu);
1262 } else if (tcg_enabled()) {
1263 qemu_tcg_init_vcpu(cpu);
1264 } else {
1265 qemu_dummy_start_vcpu(cpu);
1269 void cpu_stop_current(void)
1271 if (current_cpu) {
1272 current_cpu->stop = false;
1273 current_cpu->stopped = true;
1274 cpu_exit(current_cpu);
1275 qemu_cond_signal(&qemu_pause_cond);
1279 int vm_stop(RunState state)
1281 if (qemu_in_vcpu_thread()) {
1282 qemu_system_vmstop_request_prepare();
1283 qemu_system_vmstop_request(state);
1285 * FIXME: should not return to device code in case
1286 * vm_stop() has been requested.
1288 cpu_stop_current();
1289 return 0;
1292 return do_vm_stop(state);
1295 /* does a state transition even if the VM is already stopped,
1296 current state is forgotten forever */
1297 int vm_stop_force_state(RunState state)
1299 if (runstate_is_running()) {
1300 return vm_stop(state);
1301 } else {
1302 runstate_set(state);
1303 /* Make sure to return an error if the flush in a previous vm_stop()
1304 * failed. */
1305 return bdrv_flush_all();
1309 static int tcg_cpu_exec(CPUArchState *env)
1311 CPUState *cpu = ENV_GET_CPU(env);
1312 int ret;
1313 #ifdef CONFIG_PROFILER
1314 int64_t ti;
1315 #endif
1317 #ifdef CONFIG_PROFILER
1318 ti = profile_getclock();
1319 #endif
1320 if (use_icount) {
1321 int64_t count;
1322 int64_t deadline;
1323 int decr;
1324 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1325 + cpu->icount_extra);
1326 cpu->icount_decr.u16.low = 0;
1327 cpu->icount_extra = 0;
1328 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1330 /* Maintain prior (possibly buggy) behaviour where if no deadline
1331 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1332 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1333 * nanoseconds.
1335 if ((deadline < 0) || (deadline > INT32_MAX)) {
1336 deadline = INT32_MAX;
1339 count = qemu_icount_round(deadline);
1340 timers_state.qemu_icount += count;
1341 decr = (count > 0xffff) ? 0xffff : count;
1342 count -= decr;
1343 cpu->icount_decr.u16.low = decr;
1344 cpu->icount_extra = count;
1346 ret = cpu_exec(env);
1347 #ifdef CONFIG_PROFILER
1348 qemu_time += profile_getclock() - ti;
1349 #endif
1350 if (use_icount) {
1351 /* Fold pending instructions back into the
1352 instruction counter, and clear the interrupt flag. */
1353 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1354 + cpu->icount_extra);
1355 cpu->icount_decr.u32 = 0;
1356 cpu->icount_extra = 0;
1358 return ret;
1361 static void tcg_exec_all(void)
1363 int r;
1365 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1366 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1368 if (next_cpu == NULL) {
1369 next_cpu = first_cpu;
1371 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1372 CPUState *cpu = next_cpu;
1373 CPUArchState *env = cpu->env_ptr;
1375 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1376 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1378 if (cpu_can_run(cpu)) {
1379 r = tcg_cpu_exec(env);
1380 if (r == EXCP_DEBUG) {
1381 cpu_handle_guest_debug(cpu);
1382 break;
1384 } else if (cpu->stop || cpu->stopped) {
1385 break;
1388 exit_request = 0;
1391 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1393 /* XXX: implement xxx_cpu_list for targets that still miss it */
1394 #if defined(cpu_list)
1395 cpu_list(f, cpu_fprintf);
1396 #endif
1399 CpuInfoList *qmp_query_cpus(Error **errp)
1401 CpuInfoList *head = NULL, *cur_item = NULL;
1402 CPUState *cpu;
1404 CPU_FOREACH(cpu) {
1405 CpuInfoList *info;
1406 #if defined(TARGET_I386)
1407 X86CPU *x86_cpu = X86_CPU(cpu);
1408 CPUX86State *env = &x86_cpu->env;
1409 #elif defined(TARGET_PPC)
1410 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1411 CPUPPCState *env = &ppc_cpu->env;
1412 #elif defined(TARGET_SPARC)
1413 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1414 CPUSPARCState *env = &sparc_cpu->env;
1415 #elif defined(TARGET_MIPS)
1416 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1417 CPUMIPSState *env = &mips_cpu->env;
1418 #elif defined(TARGET_TRICORE)
1419 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1420 CPUTriCoreState *env = &tricore_cpu->env;
1421 #endif
1423 cpu_synchronize_state(cpu);
1425 info = g_malloc0(sizeof(*info));
1426 info->value = g_malloc0(sizeof(*info->value));
1427 info->value->CPU = cpu->cpu_index;
1428 info->value->current = (cpu == first_cpu);
1429 info->value->halted = cpu->halted;
1430 info->value->thread_id = cpu->thread_id;
1431 #if defined(TARGET_I386)
1432 info->value->has_pc = true;
1433 info->value->pc = env->eip + env->segs[R_CS].base;
1434 #elif defined(TARGET_PPC)
1435 info->value->has_nip = true;
1436 info->value->nip = env->nip;
1437 #elif defined(TARGET_SPARC)
1438 info->value->has_pc = true;
1439 info->value->pc = env->pc;
1440 info->value->has_npc = true;
1441 info->value->npc = env->npc;
1442 #elif defined(TARGET_MIPS)
1443 info->value->has_PC = true;
1444 info->value->PC = env->active_tc.PC;
1445 #elif defined(TARGET_TRICORE)
1446 info->value->has_PC = true;
1447 info->value->PC = env->PC;
1448 #endif
1450 /* XXX: waiting for the qapi to support GSList */
1451 if (!cur_item) {
1452 head = cur_item = info;
1453 } else {
1454 cur_item->next = info;
1455 cur_item = info;
1459 return head;
1462 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1463 bool has_cpu, int64_t cpu_index, Error **errp)
1465 FILE *f;
1466 uint32_t l;
1467 CPUState *cpu;
1468 uint8_t buf[1024];
1470 if (!has_cpu) {
1471 cpu_index = 0;
1474 cpu = qemu_get_cpu(cpu_index);
1475 if (cpu == NULL) {
1476 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1477 "a CPU number");
1478 return;
1481 f = fopen(filename, "wb");
1482 if (!f) {
1483 error_setg_file_open(errp, errno, filename);
1484 return;
1487 while (size != 0) {
1488 l = sizeof(buf);
1489 if (l > size)
1490 l = size;
1491 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1492 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1493 goto exit;
1495 if (fwrite(buf, 1, l, f) != l) {
1496 error_set(errp, QERR_IO_ERROR);
1497 goto exit;
1499 addr += l;
1500 size -= l;
1503 exit:
1504 fclose(f);
1507 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1508 Error **errp)
1510 FILE *f;
1511 uint32_t l;
1512 uint8_t buf[1024];
1514 f = fopen(filename, "wb");
1515 if (!f) {
1516 error_setg_file_open(errp, errno, filename);
1517 return;
1520 while (size != 0) {
1521 l = sizeof(buf);
1522 if (l > size)
1523 l = size;
1524 cpu_physical_memory_read(addr, buf, l);
1525 if (fwrite(buf, 1, l, f) != l) {
1526 error_set(errp, QERR_IO_ERROR);
1527 goto exit;
1529 addr += l;
1530 size -= l;
1533 exit:
1534 fclose(f);
1537 void qmp_inject_nmi(Error **errp)
1539 #if defined(TARGET_I386)
1540 CPUState *cs;
1542 CPU_FOREACH(cs) {
1543 X86CPU *cpu = X86_CPU(cs);
1545 if (!cpu->apic_state) {
1546 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1547 } else {
1548 apic_deliver_nmi(cpu->apic_state);
1551 #else
1552 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1553 #endif
1556 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1558 if (!use_icount) {
1559 return;
1562 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1563 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1564 if (icount_align_option) {
1565 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1566 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1567 } else {
1568 cpu_fprintf(f, "Max guest delay NA\n");
1569 cpu_fprintf(f, "Max guest advance NA\n");