spapr/xive: activate KVM support
[qemu.git] / cpus.c
blobffc57119ca5ed52603ce9a67929b4483c5cc7b56
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "qemu/qemu-print.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/block-backend.h"
37 #include "exec/gdbstub.h"
38 #include "sysemu/dma.h"
39 #include "sysemu/hw_accel.h"
40 #include "sysemu/kvm.h"
41 #include "sysemu/hax.h"
42 #include "sysemu/hvf.h"
43 #include "sysemu/whpx.h"
44 #include "exec/exec-all.h"
46 #include "qemu/thread.h"
47 #include "sysemu/cpus.h"
48 #include "sysemu/qtest.h"
49 #include "qemu/main-loop.h"
50 #include "qemu/option.h"
51 #include "qemu/bitmap.h"
52 #include "qemu/seqlock.h"
53 #include "qemu/guest-random.h"
54 #include "tcg.h"
55 #include "hw/nmi.h"
56 #include "sysemu/replay.h"
57 #include "hw/boards.h"
59 #ifdef CONFIG_LINUX
61 #include <sys/prctl.h>
63 #ifndef PR_MCE_KILL
64 #define PR_MCE_KILL 33
65 #endif
67 #ifndef PR_MCE_KILL_SET
68 #define PR_MCE_KILL_SET 1
69 #endif
71 #ifndef PR_MCE_KILL_EARLY
72 #define PR_MCE_KILL_EARLY 1
73 #endif
75 #endif /* CONFIG_LINUX */
77 int64_t max_delay;
78 int64_t max_advance;
80 /* vcpu throttling controls */
81 static QEMUTimer *throttle_timer;
82 static unsigned int throttle_percentage;
84 #define CPU_THROTTLE_PCT_MIN 1
85 #define CPU_THROTTLE_PCT_MAX 99
86 #define CPU_THROTTLE_TIMESLICE_NS 10000000
88 bool cpu_is_stopped(CPUState *cpu)
90 return cpu->stopped || !runstate_is_running();
93 static bool cpu_thread_is_idle(CPUState *cpu)
95 if (cpu->stop || cpu->queued_work_first) {
96 return false;
98 if (cpu_is_stopped(cpu)) {
99 return true;
101 if (!cpu->halted || cpu_has_work(cpu) ||
102 kvm_halt_in_kernel()) {
103 return false;
105 return true;
108 static bool all_cpu_threads_idle(void)
110 CPUState *cpu;
112 CPU_FOREACH(cpu) {
113 if (!cpu_thread_is_idle(cpu)) {
114 return false;
117 return true;
120 /***********************************************************/
121 /* guest cycle counter */
123 /* Protected by TimersState seqlock */
125 static bool icount_sleep = true;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
129 typedef struct TimersState {
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
134 /* Protect fields that can be respectively read outside the
135 * BQL, and written from multiple threads.
137 QemuSeqLock vm_clock_seqlock;
138 QemuSpin vm_clock_lock;
140 int16_t cpu_ticks_enabled;
142 /* Conversion factor from emulated instructions to virtual clock ticks. */
143 int16_t icount_time_shift;
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
148 int64_t vm_clock_warp_start;
149 int64_t cpu_clock_offset;
151 /* Only written by TCG thread */
152 int64_t qemu_icount;
154 /* for adjusting icount */
155 QEMUTimer *icount_rt_timer;
156 QEMUTimer *icount_vm_timer;
157 QEMUTimer *icount_warp_timer;
158 } TimersState;
160 static TimersState timers_state;
161 bool mttcg_enabled;
164 * We default to false if we know other options have been enabled
165 * which are currently incompatible with MTTCG. Otherwise when each
166 * guest (target) has been updated to support:
167 * - atomic instructions
168 * - memory ordering primitives (barriers)
169 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
171 * Once a guest architecture has been converted to the new primitives
172 * there are two remaining limitations to check.
174 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
175 * - The host must have a stronger memory order than the guest
177 * It may be possible in future to support strong guests on weak hosts
178 * but that will require tagging all load/stores in a guest with their
179 * implicit memory order requirements which would likely slow things
180 * down a lot.
183 static bool check_tcg_memory_orders_compatible(void)
185 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
186 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
187 #else
188 return false;
189 #endif
192 static bool default_mttcg_enabled(void)
194 if (use_icount || TCG_OVERSIZED_GUEST) {
195 return false;
196 } else {
197 #ifdef TARGET_SUPPORTS_MTTCG
198 return check_tcg_memory_orders_compatible();
199 #else
200 return false;
201 #endif
205 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
207 const char *t = qemu_opt_get(opts, "thread");
208 if (t) {
209 if (strcmp(t, "multi") == 0) {
210 if (TCG_OVERSIZED_GUEST) {
211 error_setg(errp, "No MTTCG when guest word size > hosts");
212 } else if (use_icount) {
213 error_setg(errp, "No MTTCG when icount is enabled");
214 } else {
215 #ifndef TARGET_SUPPORTS_MTTCG
216 warn_report("Guest not yet converted to MTTCG - "
217 "you may get unexpected results");
218 #endif
219 if (!check_tcg_memory_orders_compatible()) {
220 warn_report("Guest expects a stronger memory ordering "
221 "than the host provides");
222 error_printf("This may cause strange/hard to debug errors\n");
224 mttcg_enabled = true;
226 } else if (strcmp(t, "single") == 0) {
227 mttcg_enabled = false;
228 } else {
229 error_setg(errp, "Invalid 'thread' setting %s", t);
231 } else {
232 mttcg_enabled = default_mttcg_enabled();
236 /* The current number of executed instructions is based on what we
237 * originally budgeted minus the current state of the decrementing
238 * icount counters in extra/u16.low.
240 static int64_t cpu_get_icount_executed(CPUState *cpu)
242 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
246 * Update the global shared timer_state.qemu_icount to take into
247 * account executed instructions. This is done by the TCG vCPU
248 * thread so the main-loop can see time has moved forward.
250 static void cpu_update_icount_locked(CPUState *cpu)
252 int64_t executed = cpu_get_icount_executed(cpu);
253 cpu->icount_budget -= executed;
255 atomic_set_i64(&timers_state.qemu_icount,
256 timers_state.qemu_icount + executed);
260 * Update the global shared timer_state.qemu_icount to take into
261 * account executed instructions. This is done by the TCG vCPU
262 * thread so the main-loop can see time has moved forward.
264 void cpu_update_icount(CPUState *cpu)
266 seqlock_write_lock(&timers_state.vm_clock_seqlock,
267 &timers_state.vm_clock_lock);
268 cpu_update_icount_locked(cpu);
269 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
270 &timers_state.vm_clock_lock);
273 static int64_t cpu_get_icount_raw_locked(void)
275 CPUState *cpu = current_cpu;
277 if (cpu && cpu->running) {
278 if (!cpu->can_do_io) {
279 error_report("Bad icount read");
280 exit(1);
282 /* Take into account what has run */
283 cpu_update_icount_locked(cpu);
285 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
286 return atomic_read_i64(&timers_state.qemu_icount);
289 static int64_t cpu_get_icount_locked(void)
291 int64_t icount = cpu_get_icount_raw_locked();
292 return atomic_read_i64(&timers_state.qemu_icount_bias) +
293 cpu_icount_to_ns(icount);
296 int64_t cpu_get_icount_raw(void)
298 int64_t icount;
299 unsigned start;
301 do {
302 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
303 icount = cpu_get_icount_raw_locked();
304 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
306 return icount;
309 /* Return the virtual CPU time, based on the instruction counter. */
310 int64_t cpu_get_icount(void)
312 int64_t icount;
313 unsigned start;
315 do {
316 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
317 icount = cpu_get_icount_locked();
318 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
320 return icount;
323 int64_t cpu_icount_to_ns(int64_t icount)
325 return icount << atomic_read(&timers_state.icount_time_shift);
328 static int64_t cpu_get_ticks_locked(void)
330 int64_t ticks = timers_state.cpu_ticks_offset;
331 if (timers_state.cpu_ticks_enabled) {
332 ticks += cpu_get_host_ticks();
335 if (timers_state.cpu_ticks_prev > ticks) {
336 /* Non increasing ticks may happen if the host uses software suspend. */
337 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
338 ticks = timers_state.cpu_ticks_prev;
341 timers_state.cpu_ticks_prev = ticks;
342 return ticks;
345 /* return the time elapsed in VM between vm_start and vm_stop. Unless
346 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
347 * counter.
349 int64_t cpu_get_ticks(void)
351 int64_t ticks;
353 if (use_icount) {
354 return cpu_get_icount();
357 qemu_spin_lock(&timers_state.vm_clock_lock);
358 ticks = cpu_get_ticks_locked();
359 qemu_spin_unlock(&timers_state.vm_clock_lock);
360 return ticks;
363 static int64_t cpu_get_clock_locked(void)
365 int64_t time;
367 time = timers_state.cpu_clock_offset;
368 if (timers_state.cpu_ticks_enabled) {
369 time += get_clock();
372 return time;
375 /* Return the monotonic time elapsed in VM, i.e.,
376 * the time between vm_start and vm_stop
378 int64_t cpu_get_clock(void)
380 int64_t ti;
381 unsigned start;
383 do {
384 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
385 ti = cpu_get_clock_locked();
386 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
388 return ti;
391 /* enable cpu_get_ticks()
392 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
394 void cpu_enable_ticks(void)
396 seqlock_write_lock(&timers_state.vm_clock_seqlock,
397 &timers_state.vm_clock_lock);
398 if (!timers_state.cpu_ticks_enabled) {
399 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
400 timers_state.cpu_clock_offset -= get_clock();
401 timers_state.cpu_ticks_enabled = 1;
403 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
404 &timers_state.vm_clock_lock);
407 /* disable cpu_get_ticks() : the clock is stopped. You must not call
408 * cpu_get_ticks() after that.
409 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
411 void cpu_disable_ticks(void)
413 seqlock_write_lock(&timers_state.vm_clock_seqlock,
414 &timers_state.vm_clock_lock);
415 if (timers_state.cpu_ticks_enabled) {
416 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
417 timers_state.cpu_clock_offset = cpu_get_clock_locked();
418 timers_state.cpu_ticks_enabled = 0;
420 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
421 &timers_state.vm_clock_lock);
424 /* Correlation between real and virtual time is always going to be
425 fairly approximate, so ignore small variation.
426 When the guest is idle real and virtual time will be aligned in
427 the IO wait loop. */
428 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
430 static void icount_adjust(void)
432 int64_t cur_time;
433 int64_t cur_icount;
434 int64_t delta;
436 /* Protected by TimersState mutex. */
437 static int64_t last_delta;
439 /* If the VM is not running, then do nothing. */
440 if (!runstate_is_running()) {
441 return;
444 seqlock_write_lock(&timers_state.vm_clock_seqlock,
445 &timers_state.vm_clock_lock);
446 cur_time = cpu_get_clock_locked();
447 cur_icount = cpu_get_icount_locked();
449 delta = cur_icount - cur_time;
450 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
451 if (delta > 0
452 && last_delta + ICOUNT_WOBBLE < delta * 2
453 && timers_state.icount_time_shift > 0) {
454 /* The guest is getting too far ahead. Slow time down. */
455 atomic_set(&timers_state.icount_time_shift,
456 timers_state.icount_time_shift - 1);
458 if (delta < 0
459 && last_delta - ICOUNT_WOBBLE > delta * 2
460 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
461 /* The guest is getting too far behind. Speed time up. */
462 atomic_set(&timers_state.icount_time_shift,
463 timers_state.icount_time_shift + 1);
465 last_delta = delta;
466 atomic_set_i64(&timers_state.qemu_icount_bias,
467 cur_icount - (timers_state.qemu_icount
468 << timers_state.icount_time_shift));
469 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
470 &timers_state.vm_clock_lock);
473 static void icount_adjust_rt(void *opaque)
475 timer_mod(timers_state.icount_rt_timer,
476 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
477 icount_adjust();
480 static void icount_adjust_vm(void *opaque)
482 timer_mod(timers_state.icount_vm_timer,
483 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
484 NANOSECONDS_PER_SECOND / 10);
485 icount_adjust();
488 static int64_t qemu_icount_round(int64_t count)
490 int shift = atomic_read(&timers_state.icount_time_shift);
491 return (count + (1 << shift) - 1) >> shift;
494 static void icount_warp_rt(void)
496 unsigned seq;
497 int64_t warp_start;
499 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
500 * changes from -1 to another value, so the race here is okay.
502 do {
503 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
504 warp_start = timers_state.vm_clock_warp_start;
505 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
507 if (warp_start == -1) {
508 return;
511 seqlock_write_lock(&timers_state.vm_clock_seqlock,
512 &timers_state.vm_clock_lock);
513 if (runstate_is_running()) {
514 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
515 cpu_get_clock_locked());
516 int64_t warp_delta;
518 warp_delta = clock - timers_state.vm_clock_warp_start;
519 if (use_icount == 2) {
521 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
522 * far ahead of real time.
524 int64_t cur_icount = cpu_get_icount_locked();
525 int64_t delta = clock - cur_icount;
526 warp_delta = MIN(warp_delta, delta);
528 atomic_set_i64(&timers_state.qemu_icount_bias,
529 timers_state.qemu_icount_bias + warp_delta);
531 timers_state.vm_clock_warp_start = -1;
532 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
533 &timers_state.vm_clock_lock);
535 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
536 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
540 static void icount_timer_cb(void *opaque)
542 /* No need for a checkpoint because the timer already synchronizes
543 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
545 icount_warp_rt();
548 void qtest_clock_warp(int64_t dest)
550 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
551 AioContext *aio_context;
552 assert(qtest_enabled());
553 aio_context = qemu_get_aio_context();
554 while (clock < dest) {
555 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
556 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
558 seqlock_write_lock(&timers_state.vm_clock_seqlock,
559 &timers_state.vm_clock_lock);
560 atomic_set_i64(&timers_state.qemu_icount_bias,
561 timers_state.qemu_icount_bias + warp);
562 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
563 &timers_state.vm_clock_lock);
565 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
566 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
567 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
569 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
572 void qemu_start_warp_timer(void)
574 int64_t clock;
575 int64_t deadline;
577 if (!use_icount) {
578 return;
581 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
582 * do not fire, so computing the deadline does not make sense.
584 if (!runstate_is_running()) {
585 return;
588 if (replay_mode != REPLAY_MODE_PLAY) {
589 if (!all_cpu_threads_idle()) {
590 return;
593 if (qtest_enabled()) {
594 /* When testing, qtest commands advance icount. */
595 return;
598 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
599 } else {
600 /* warp clock deterministically in record/replay mode */
601 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
602 /* vCPU is sleeping and warp can't be started.
603 It is probably a race condition: notification sent
604 to vCPU was processed in advance and vCPU went to sleep.
605 Therefore we have to wake it up for doing someting. */
606 if (replay_has_checkpoint()) {
607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
609 return;
613 /* We want to use the earliest deadline from ALL vm_clocks */
614 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
615 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
616 if (deadline < 0) {
617 static bool notified;
618 if (!icount_sleep && !notified) {
619 warn_report("icount sleep disabled and no active timers");
620 notified = true;
622 return;
625 if (deadline > 0) {
627 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
628 * sleep. Otherwise, the CPU might be waiting for a future timer
629 * interrupt to wake it up, but the interrupt never comes because
630 * the vCPU isn't running any insns and thus doesn't advance the
631 * QEMU_CLOCK_VIRTUAL.
633 if (!icount_sleep) {
635 * We never let VCPUs sleep in no sleep icount mode.
636 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
637 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
638 * It is useful when we want a deterministic execution time,
639 * isolated from host latencies.
641 seqlock_write_lock(&timers_state.vm_clock_seqlock,
642 &timers_state.vm_clock_lock);
643 atomic_set_i64(&timers_state.qemu_icount_bias,
644 timers_state.qemu_icount_bias + deadline);
645 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
646 &timers_state.vm_clock_lock);
647 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
648 } else {
650 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
651 * "real" time, (related to the time left until the next event) has
652 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
653 * This avoids that the warps are visible externally; for example,
654 * you will not be sending network packets continuously instead of
655 * every 100ms.
657 seqlock_write_lock(&timers_state.vm_clock_seqlock,
658 &timers_state.vm_clock_lock);
659 if (timers_state.vm_clock_warp_start == -1
660 || timers_state.vm_clock_warp_start > clock) {
661 timers_state.vm_clock_warp_start = clock;
663 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
664 &timers_state.vm_clock_lock);
665 timer_mod_anticipate(timers_state.icount_warp_timer,
666 clock + deadline);
668 } else if (deadline == 0) {
669 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
673 static void qemu_account_warp_timer(void)
675 if (!use_icount || !icount_sleep) {
676 return;
679 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
680 * do not fire, so computing the deadline does not make sense.
682 if (!runstate_is_running()) {
683 return;
686 /* warp clock deterministically in record/replay mode */
687 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
688 return;
691 timer_del(timers_state.icount_warp_timer);
692 icount_warp_rt();
695 static bool icount_state_needed(void *opaque)
697 return use_icount;
700 static bool warp_timer_state_needed(void *opaque)
702 TimersState *s = opaque;
703 return s->icount_warp_timer != NULL;
706 static bool adjust_timers_state_needed(void *opaque)
708 TimersState *s = opaque;
709 return s->icount_rt_timer != NULL;
713 * Subsection for warp timer migration is optional, because may not be created
715 static const VMStateDescription icount_vmstate_warp_timer = {
716 .name = "timer/icount/warp_timer",
717 .version_id = 1,
718 .minimum_version_id = 1,
719 .needed = warp_timer_state_needed,
720 .fields = (VMStateField[]) {
721 VMSTATE_INT64(vm_clock_warp_start, TimersState),
722 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
723 VMSTATE_END_OF_LIST()
727 static const VMStateDescription icount_vmstate_adjust_timers = {
728 .name = "timer/icount/timers",
729 .version_id = 1,
730 .minimum_version_id = 1,
731 .needed = adjust_timers_state_needed,
732 .fields = (VMStateField[]) {
733 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
734 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
735 VMSTATE_END_OF_LIST()
740 * This is a subsection for icount migration.
742 static const VMStateDescription icount_vmstate_timers = {
743 .name = "timer/icount",
744 .version_id = 1,
745 .minimum_version_id = 1,
746 .needed = icount_state_needed,
747 .fields = (VMStateField[]) {
748 VMSTATE_INT64(qemu_icount_bias, TimersState),
749 VMSTATE_INT64(qemu_icount, TimersState),
750 VMSTATE_END_OF_LIST()
752 .subsections = (const VMStateDescription*[]) {
753 &icount_vmstate_warp_timer,
754 &icount_vmstate_adjust_timers,
755 NULL
759 static const VMStateDescription vmstate_timers = {
760 .name = "timer",
761 .version_id = 2,
762 .minimum_version_id = 1,
763 .fields = (VMStateField[]) {
764 VMSTATE_INT64(cpu_ticks_offset, TimersState),
765 VMSTATE_UNUSED(8),
766 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
767 VMSTATE_END_OF_LIST()
769 .subsections = (const VMStateDescription*[]) {
770 &icount_vmstate_timers,
771 NULL
775 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
777 double pct;
778 double throttle_ratio;
779 long sleeptime_ns;
781 if (!cpu_throttle_get_percentage()) {
782 return;
785 pct = (double)cpu_throttle_get_percentage()/100;
786 throttle_ratio = pct / (1 - pct);
787 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
789 qemu_mutex_unlock_iothread();
790 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
791 qemu_mutex_lock_iothread();
792 atomic_set(&cpu->throttle_thread_scheduled, 0);
795 static void cpu_throttle_timer_tick(void *opaque)
797 CPUState *cpu;
798 double pct;
800 /* Stop the timer if needed */
801 if (!cpu_throttle_get_percentage()) {
802 return;
804 CPU_FOREACH(cpu) {
805 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
806 async_run_on_cpu(cpu, cpu_throttle_thread,
807 RUN_ON_CPU_NULL);
811 pct = (double)cpu_throttle_get_percentage()/100;
812 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
813 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
816 void cpu_throttle_set(int new_throttle_pct)
818 /* Ensure throttle percentage is within valid range */
819 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
820 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
822 atomic_set(&throttle_percentage, new_throttle_pct);
824 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
825 CPU_THROTTLE_TIMESLICE_NS);
828 void cpu_throttle_stop(void)
830 atomic_set(&throttle_percentage, 0);
833 bool cpu_throttle_active(void)
835 return (cpu_throttle_get_percentage() != 0);
838 int cpu_throttle_get_percentage(void)
840 return atomic_read(&throttle_percentage);
843 void cpu_ticks_init(void)
845 seqlock_init(&timers_state.vm_clock_seqlock);
846 qemu_spin_init(&timers_state.vm_clock_lock);
847 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
848 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
849 cpu_throttle_timer_tick, NULL);
852 void configure_icount(QemuOpts *opts, Error **errp)
854 const char *option;
855 char *rem_str = NULL;
857 option = qemu_opt_get(opts, "shift");
858 if (!option) {
859 if (qemu_opt_get(opts, "align") != NULL) {
860 error_setg(errp, "Please specify shift option when using align");
862 return;
865 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
866 if (icount_sleep) {
867 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
868 icount_timer_cb, NULL);
871 icount_align_option = qemu_opt_get_bool(opts, "align", false);
873 if (icount_align_option && !icount_sleep) {
874 error_setg(errp, "align=on and sleep=off are incompatible");
876 if (strcmp(option, "auto") != 0) {
877 errno = 0;
878 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
879 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
880 error_setg(errp, "icount: Invalid shift value");
882 use_icount = 1;
883 return;
884 } else if (icount_align_option) {
885 error_setg(errp, "shift=auto and align=on are incompatible");
886 } else if (!icount_sleep) {
887 error_setg(errp, "shift=auto and sleep=off are incompatible");
890 use_icount = 2;
892 /* 125MIPS seems a reasonable initial guess at the guest speed.
893 It will be corrected fairly quickly anyway. */
894 timers_state.icount_time_shift = 3;
896 /* Have both realtime and virtual time triggers for speed adjustment.
897 The realtime trigger catches emulated time passing too slowly,
898 the virtual time trigger catches emulated time passing too fast.
899 Realtime triggers occur even when idle, so use them less frequently
900 than VM triggers. */
901 timers_state.vm_clock_warp_start = -1;
902 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
903 icount_adjust_rt, NULL);
904 timer_mod(timers_state.icount_rt_timer,
905 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
906 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
907 icount_adjust_vm, NULL);
908 timer_mod(timers_state.icount_vm_timer,
909 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
910 NANOSECONDS_PER_SECOND / 10);
913 /***********************************************************/
914 /* TCG vCPU kick timer
916 * The kick timer is responsible for moving single threaded vCPU
917 * emulation on to the next vCPU. If more than one vCPU is running a
918 * timer event with force a cpu->exit so the next vCPU can get
919 * scheduled.
921 * The timer is removed if all vCPUs are idle and restarted again once
922 * idleness is complete.
925 static QEMUTimer *tcg_kick_vcpu_timer;
926 static CPUState *tcg_current_rr_cpu;
928 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
930 static inline int64_t qemu_tcg_next_kick(void)
932 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
935 /* Kick the currently round-robin scheduled vCPU */
936 static void qemu_cpu_kick_rr_cpu(void)
938 CPUState *cpu;
939 do {
940 cpu = atomic_mb_read(&tcg_current_rr_cpu);
941 if (cpu) {
942 cpu_exit(cpu);
944 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
947 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
951 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
953 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
954 qemu_notify_event();
955 return;
958 if (qemu_in_vcpu_thread()) {
959 /* A CPU is currently running; kick it back out to the
960 * tcg_cpu_exec() loop so it will recalculate its
961 * icount deadline immediately.
963 qemu_cpu_kick(current_cpu);
964 } else if (first_cpu) {
965 /* qemu_cpu_kick is not enough to kick a halted CPU out of
966 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
967 * causes cpu_thread_is_idle to return false. This way,
968 * handle_icount_deadline can run.
969 * If we have no CPUs at all for some reason, we don't
970 * need to do anything.
972 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
976 static void kick_tcg_thread(void *opaque)
978 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
979 qemu_cpu_kick_rr_cpu();
982 static void start_tcg_kick_timer(void)
984 assert(!mttcg_enabled);
985 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
986 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
987 kick_tcg_thread, NULL);
989 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
990 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
994 static void stop_tcg_kick_timer(void)
996 assert(!mttcg_enabled);
997 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
998 timer_del(tcg_kick_vcpu_timer);
1002 /***********************************************************/
1003 void hw_error(const char *fmt, ...)
1005 va_list ap;
1006 CPUState *cpu;
1008 va_start(ap, fmt);
1009 fprintf(stderr, "qemu: hardware error: ");
1010 vfprintf(stderr, fmt, ap);
1011 fprintf(stderr, "\n");
1012 CPU_FOREACH(cpu) {
1013 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1014 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
1016 va_end(ap);
1017 abort();
1020 void cpu_synchronize_all_states(void)
1022 CPUState *cpu;
1024 CPU_FOREACH(cpu) {
1025 cpu_synchronize_state(cpu);
1026 /* TODO: move to cpu_synchronize_state() */
1027 if (hvf_enabled()) {
1028 hvf_cpu_synchronize_state(cpu);
1033 void cpu_synchronize_all_post_reset(void)
1035 CPUState *cpu;
1037 CPU_FOREACH(cpu) {
1038 cpu_synchronize_post_reset(cpu);
1039 /* TODO: move to cpu_synchronize_post_reset() */
1040 if (hvf_enabled()) {
1041 hvf_cpu_synchronize_post_reset(cpu);
1046 void cpu_synchronize_all_post_init(void)
1048 CPUState *cpu;
1050 CPU_FOREACH(cpu) {
1051 cpu_synchronize_post_init(cpu);
1052 /* TODO: move to cpu_synchronize_post_init() */
1053 if (hvf_enabled()) {
1054 hvf_cpu_synchronize_post_init(cpu);
1059 void cpu_synchronize_all_pre_loadvm(void)
1061 CPUState *cpu;
1063 CPU_FOREACH(cpu) {
1064 cpu_synchronize_pre_loadvm(cpu);
1068 static int do_vm_stop(RunState state, bool send_stop)
1070 int ret = 0;
1072 if (runstate_is_running()) {
1073 cpu_disable_ticks();
1074 pause_all_vcpus();
1075 runstate_set(state);
1076 vm_state_notify(0, state);
1077 if (send_stop) {
1078 qapi_event_send_stop();
1082 bdrv_drain_all();
1083 replay_disable_events();
1084 ret = bdrv_flush_all();
1086 return ret;
1089 /* Special vm_stop() variant for terminating the process. Historically clients
1090 * did not expect a QMP STOP event and so we need to retain compatibility.
1092 int vm_shutdown(void)
1094 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1097 static bool cpu_can_run(CPUState *cpu)
1099 if (cpu->stop) {
1100 return false;
1102 if (cpu_is_stopped(cpu)) {
1103 return false;
1105 return true;
1108 static void cpu_handle_guest_debug(CPUState *cpu)
1110 gdb_set_stop_cpu(cpu);
1111 qemu_system_debug_request();
1112 cpu->stopped = true;
1115 #ifdef CONFIG_LINUX
1116 static void sigbus_reraise(void)
1118 sigset_t set;
1119 struct sigaction action;
1121 memset(&action, 0, sizeof(action));
1122 action.sa_handler = SIG_DFL;
1123 if (!sigaction(SIGBUS, &action, NULL)) {
1124 raise(SIGBUS);
1125 sigemptyset(&set);
1126 sigaddset(&set, SIGBUS);
1127 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1129 perror("Failed to re-raise SIGBUS!\n");
1130 abort();
1133 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1135 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1136 sigbus_reraise();
1139 if (current_cpu) {
1140 /* Called asynchronously in VCPU thread. */
1141 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1142 sigbus_reraise();
1144 } else {
1145 /* Called synchronously (via signalfd) in main thread. */
1146 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1147 sigbus_reraise();
1152 static void qemu_init_sigbus(void)
1154 struct sigaction action;
1156 memset(&action, 0, sizeof(action));
1157 action.sa_flags = SA_SIGINFO;
1158 action.sa_sigaction = sigbus_handler;
1159 sigaction(SIGBUS, &action, NULL);
1161 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1163 #else /* !CONFIG_LINUX */
1164 static void qemu_init_sigbus(void)
1167 #endif /* !CONFIG_LINUX */
1169 static QemuMutex qemu_global_mutex;
1171 static QemuThread io_thread;
1173 /* cpu creation */
1174 static QemuCond qemu_cpu_cond;
1175 /* system init */
1176 static QemuCond qemu_pause_cond;
1178 void qemu_init_cpu_loop(void)
1180 qemu_init_sigbus();
1181 qemu_cond_init(&qemu_cpu_cond);
1182 qemu_cond_init(&qemu_pause_cond);
1183 qemu_mutex_init(&qemu_global_mutex);
1185 qemu_thread_get_self(&io_thread);
1188 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1190 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1193 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1195 if (kvm_destroy_vcpu(cpu) < 0) {
1196 error_report("kvm_destroy_vcpu failed");
1197 exit(EXIT_FAILURE);
1201 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1205 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1207 g_assert(qemu_cpu_is_self(cpu));
1208 cpu->stop = false;
1209 cpu->stopped = true;
1210 if (exit) {
1211 cpu_exit(cpu);
1213 qemu_cond_broadcast(&qemu_pause_cond);
1216 static void qemu_wait_io_event_common(CPUState *cpu)
1218 atomic_mb_set(&cpu->thread_kicked, false);
1219 if (cpu->stop) {
1220 qemu_cpu_stop(cpu, false);
1222 process_queued_cpu_work(cpu);
1225 static void qemu_tcg_rr_wait_io_event(void)
1227 CPUState *cpu;
1229 while (all_cpu_threads_idle()) {
1230 stop_tcg_kick_timer();
1231 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1234 start_tcg_kick_timer();
1236 CPU_FOREACH(cpu) {
1237 qemu_wait_io_event_common(cpu);
1241 static void qemu_wait_io_event(CPUState *cpu)
1243 while (cpu_thread_is_idle(cpu)) {
1244 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1247 #ifdef _WIN32
1248 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1249 if (!tcg_enabled()) {
1250 SleepEx(0, TRUE);
1252 #endif
1253 qemu_wait_io_event_common(cpu);
1256 static void *qemu_kvm_cpu_thread_fn(void *arg)
1258 CPUState *cpu = arg;
1259 int r;
1261 rcu_register_thread();
1263 qemu_mutex_lock_iothread();
1264 qemu_thread_get_self(cpu->thread);
1265 cpu->thread_id = qemu_get_thread_id();
1266 cpu->can_do_io = 1;
1267 current_cpu = cpu;
1269 r = kvm_init_vcpu(cpu);
1270 if (r < 0) {
1271 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1272 exit(1);
1275 kvm_init_cpu_signals(cpu);
1277 /* signal CPU creation */
1278 cpu->created = true;
1279 qemu_cond_signal(&qemu_cpu_cond);
1280 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1282 do {
1283 if (cpu_can_run(cpu)) {
1284 r = kvm_cpu_exec(cpu);
1285 if (r == EXCP_DEBUG) {
1286 cpu_handle_guest_debug(cpu);
1289 qemu_wait_io_event(cpu);
1290 } while (!cpu->unplug || cpu_can_run(cpu));
1292 qemu_kvm_destroy_vcpu(cpu);
1293 cpu->created = false;
1294 qemu_cond_signal(&qemu_cpu_cond);
1295 qemu_mutex_unlock_iothread();
1296 rcu_unregister_thread();
1297 return NULL;
1300 static void *qemu_dummy_cpu_thread_fn(void *arg)
1302 #ifdef _WIN32
1303 error_report("qtest is not supported under Windows");
1304 exit(1);
1305 #else
1306 CPUState *cpu = arg;
1307 sigset_t waitset;
1308 int r;
1310 rcu_register_thread();
1312 qemu_mutex_lock_iothread();
1313 qemu_thread_get_self(cpu->thread);
1314 cpu->thread_id = qemu_get_thread_id();
1315 cpu->can_do_io = 1;
1316 current_cpu = cpu;
1318 sigemptyset(&waitset);
1319 sigaddset(&waitset, SIG_IPI);
1321 /* signal CPU creation */
1322 cpu->created = true;
1323 qemu_cond_signal(&qemu_cpu_cond);
1324 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1326 do {
1327 qemu_mutex_unlock_iothread();
1328 do {
1329 int sig;
1330 r = sigwait(&waitset, &sig);
1331 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1332 if (r == -1) {
1333 perror("sigwait");
1334 exit(1);
1336 qemu_mutex_lock_iothread();
1337 qemu_wait_io_event(cpu);
1338 } while (!cpu->unplug);
1340 qemu_mutex_unlock_iothread();
1341 rcu_unregister_thread();
1342 return NULL;
1343 #endif
1346 static int64_t tcg_get_icount_limit(void)
1348 int64_t deadline;
1350 if (replay_mode != REPLAY_MODE_PLAY) {
1351 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1353 /* Maintain prior (possibly buggy) behaviour where if no deadline
1354 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1355 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1356 * nanoseconds.
1358 if ((deadline < 0) || (deadline > INT32_MAX)) {
1359 deadline = INT32_MAX;
1362 return qemu_icount_round(deadline);
1363 } else {
1364 return replay_get_instructions();
1368 static void handle_icount_deadline(void)
1370 assert(qemu_in_vcpu_thread());
1371 if (use_icount) {
1372 int64_t deadline =
1373 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1375 if (deadline == 0) {
1376 /* Wake up other AioContexts. */
1377 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1378 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1383 static void prepare_icount_for_run(CPUState *cpu)
1385 if (use_icount) {
1386 int insns_left;
1388 /* These should always be cleared by process_icount_data after
1389 * each vCPU execution. However u16.high can be raised
1390 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1392 g_assert(cpu->icount_decr.u16.low == 0);
1393 g_assert(cpu->icount_extra == 0);
1395 cpu->icount_budget = tcg_get_icount_limit();
1396 insns_left = MIN(0xffff, cpu->icount_budget);
1397 cpu->icount_decr.u16.low = insns_left;
1398 cpu->icount_extra = cpu->icount_budget - insns_left;
1400 replay_mutex_lock();
1404 static void process_icount_data(CPUState *cpu)
1406 if (use_icount) {
1407 /* Account for executed instructions */
1408 cpu_update_icount(cpu);
1410 /* Reset the counters */
1411 cpu->icount_decr.u16.low = 0;
1412 cpu->icount_extra = 0;
1413 cpu->icount_budget = 0;
1415 replay_account_executed_instructions();
1417 replay_mutex_unlock();
1422 static int tcg_cpu_exec(CPUState *cpu)
1424 int ret;
1425 #ifdef CONFIG_PROFILER
1426 int64_t ti;
1427 #endif
1429 assert(tcg_enabled());
1430 #ifdef CONFIG_PROFILER
1431 ti = profile_getclock();
1432 #endif
1433 cpu_exec_start(cpu);
1434 ret = cpu_exec(cpu);
1435 cpu_exec_end(cpu);
1436 #ifdef CONFIG_PROFILER
1437 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1438 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1439 #endif
1440 return ret;
1443 /* Destroy any remaining vCPUs which have been unplugged and have
1444 * finished running
1446 static void deal_with_unplugged_cpus(void)
1448 CPUState *cpu;
1450 CPU_FOREACH(cpu) {
1451 if (cpu->unplug && !cpu_can_run(cpu)) {
1452 qemu_tcg_destroy_vcpu(cpu);
1453 cpu->created = false;
1454 qemu_cond_signal(&qemu_cpu_cond);
1455 break;
1460 /* Single-threaded TCG
1462 * In the single-threaded case each vCPU is simulated in turn. If
1463 * there is more than a single vCPU we create a simple timer to kick
1464 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1465 * This is done explicitly rather than relying on side-effects
1466 * elsewhere.
1469 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1471 CPUState *cpu = arg;
1473 assert(tcg_enabled());
1474 rcu_register_thread();
1475 tcg_register_thread();
1477 qemu_mutex_lock_iothread();
1478 qemu_thread_get_self(cpu->thread);
1480 cpu->thread_id = qemu_get_thread_id();
1481 cpu->created = true;
1482 cpu->can_do_io = 1;
1483 qemu_cond_signal(&qemu_cpu_cond);
1484 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1486 /* wait for initial kick-off after machine start */
1487 while (first_cpu->stopped) {
1488 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1490 /* process any pending work */
1491 CPU_FOREACH(cpu) {
1492 current_cpu = cpu;
1493 qemu_wait_io_event_common(cpu);
1497 start_tcg_kick_timer();
1499 cpu = first_cpu;
1501 /* process any pending work */
1502 cpu->exit_request = 1;
1504 while (1) {
1505 qemu_mutex_unlock_iothread();
1506 replay_mutex_lock();
1507 qemu_mutex_lock_iothread();
1508 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1509 qemu_account_warp_timer();
1511 /* Run the timers here. This is much more efficient than
1512 * waking up the I/O thread and waiting for completion.
1514 handle_icount_deadline();
1516 replay_mutex_unlock();
1518 if (!cpu) {
1519 cpu = first_cpu;
1522 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1524 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1525 current_cpu = cpu;
1527 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1528 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1530 if (cpu_can_run(cpu)) {
1531 int r;
1533 qemu_mutex_unlock_iothread();
1534 prepare_icount_for_run(cpu);
1536 r = tcg_cpu_exec(cpu);
1538 process_icount_data(cpu);
1539 qemu_mutex_lock_iothread();
1541 if (r == EXCP_DEBUG) {
1542 cpu_handle_guest_debug(cpu);
1543 break;
1544 } else if (r == EXCP_ATOMIC) {
1545 qemu_mutex_unlock_iothread();
1546 cpu_exec_step_atomic(cpu);
1547 qemu_mutex_lock_iothread();
1548 break;
1550 } else if (cpu->stop) {
1551 if (cpu->unplug) {
1552 cpu = CPU_NEXT(cpu);
1554 break;
1557 cpu = CPU_NEXT(cpu);
1558 } /* while (cpu && !cpu->exit_request).. */
1560 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1561 atomic_set(&tcg_current_rr_cpu, NULL);
1563 if (cpu && cpu->exit_request) {
1564 atomic_mb_set(&cpu->exit_request, 0);
1567 if (use_icount && all_cpu_threads_idle()) {
1569 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1570 * in the main_loop, wake it up in order to start the warp timer.
1572 qemu_notify_event();
1575 qemu_tcg_rr_wait_io_event();
1576 deal_with_unplugged_cpus();
1579 rcu_unregister_thread();
1580 return NULL;
1583 static void *qemu_hax_cpu_thread_fn(void *arg)
1585 CPUState *cpu = arg;
1586 int r;
1588 rcu_register_thread();
1589 qemu_mutex_lock_iothread();
1590 qemu_thread_get_self(cpu->thread);
1592 cpu->thread_id = qemu_get_thread_id();
1593 cpu->created = true;
1594 cpu->halted = 0;
1595 current_cpu = cpu;
1597 hax_init_vcpu(cpu);
1598 qemu_cond_signal(&qemu_cpu_cond);
1599 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1601 do {
1602 if (cpu_can_run(cpu)) {
1603 r = hax_smp_cpu_exec(cpu);
1604 if (r == EXCP_DEBUG) {
1605 cpu_handle_guest_debug(cpu);
1609 qemu_wait_io_event(cpu);
1610 } while (!cpu->unplug || cpu_can_run(cpu));
1611 rcu_unregister_thread();
1612 return NULL;
1615 /* The HVF-specific vCPU thread function. This one should only run when the host
1616 * CPU supports the VMX "unrestricted guest" feature. */
1617 static void *qemu_hvf_cpu_thread_fn(void *arg)
1619 CPUState *cpu = arg;
1621 int r;
1623 assert(hvf_enabled());
1625 rcu_register_thread();
1627 qemu_mutex_lock_iothread();
1628 qemu_thread_get_self(cpu->thread);
1630 cpu->thread_id = qemu_get_thread_id();
1631 cpu->can_do_io = 1;
1632 current_cpu = cpu;
1634 hvf_init_vcpu(cpu);
1636 /* signal CPU creation */
1637 cpu->created = true;
1638 qemu_cond_signal(&qemu_cpu_cond);
1639 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1641 do {
1642 if (cpu_can_run(cpu)) {
1643 r = hvf_vcpu_exec(cpu);
1644 if (r == EXCP_DEBUG) {
1645 cpu_handle_guest_debug(cpu);
1648 qemu_wait_io_event(cpu);
1649 } while (!cpu->unplug || cpu_can_run(cpu));
1651 hvf_vcpu_destroy(cpu);
1652 cpu->created = false;
1653 qemu_cond_signal(&qemu_cpu_cond);
1654 qemu_mutex_unlock_iothread();
1655 rcu_unregister_thread();
1656 return NULL;
1659 static void *qemu_whpx_cpu_thread_fn(void *arg)
1661 CPUState *cpu = arg;
1662 int r;
1664 rcu_register_thread();
1666 qemu_mutex_lock_iothread();
1667 qemu_thread_get_self(cpu->thread);
1668 cpu->thread_id = qemu_get_thread_id();
1669 current_cpu = cpu;
1671 r = whpx_init_vcpu(cpu);
1672 if (r < 0) {
1673 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1674 exit(1);
1677 /* signal CPU creation */
1678 cpu->created = true;
1679 qemu_cond_signal(&qemu_cpu_cond);
1680 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1682 do {
1683 if (cpu_can_run(cpu)) {
1684 r = whpx_vcpu_exec(cpu);
1685 if (r == EXCP_DEBUG) {
1686 cpu_handle_guest_debug(cpu);
1689 while (cpu_thread_is_idle(cpu)) {
1690 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1692 qemu_wait_io_event_common(cpu);
1693 } while (!cpu->unplug || cpu_can_run(cpu));
1695 whpx_destroy_vcpu(cpu);
1696 cpu->created = false;
1697 qemu_cond_signal(&qemu_cpu_cond);
1698 qemu_mutex_unlock_iothread();
1699 rcu_unregister_thread();
1700 return NULL;
1703 #ifdef _WIN32
1704 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1707 #endif
1709 /* Multi-threaded TCG
1711 * In the multi-threaded case each vCPU has its own thread. The TLS
1712 * variable current_cpu can be used deep in the code to find the
1713 * current CPUState for a given thread.
1716 static void *qemu_tcg_cpu_thread_fn(void *arg)
1718 CPUState *cpu = arg;
1720 assert(tcg_enabled());
1721 g_assert(!use_icount);
1723 rcu_register_thread();
1724 tcg_register_thread();
1726 qemu_mutex_lock_iothread();
1727 qemu_thread_get_self(cpu->thread);
1729 cpu->thread_id = qemu_get_thread_id();
1730 cpu->created = true;
1731 cpu->can_do_io = 1;
1732 current_cpu = cpu;
1733 qemu_cond_signal(&qemu_cpu_cond);
1734 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1736 /* process any pending work */
1737 cpu->exit_request = 1;
1739 do {
1740 if (cpu_can_run(cpu)) {
1741 int r;
1742 qemu_mutex_unlock_iothread();
1743 r = tcg_cpu_exec(cpu);
1744 qemu_mutex_lock_iothread();
1745 switch (r) {
1746 case EXCP_DEBUG:
1747 cpu_handle_guest_debug(cpu);
1748 break;
1749 case EXCP_HALTED:
1750 /* during start-up the vCPU is reset and the thread is
1751 * kicked several times. If we don't ensure we go back
1752 * to sleep in the halted state we won't cleanly
1753 * start-up when the vCPU is enabled.
1755 * cpu->halted should ensure we sleep in wait_io_event
1757 g_assert(cpu->halted);
1758 break;
1759 case EXCP_ATOMIC:
1760 qemu_mutex_unlock_iothread();
1761 cpu_exec_step_atomic(cpu);
1762 qemu_mutex_lock_iothread();
1763 default:
1764 /* Ignore everything else? */
1765 break;
1769 atomic_mb_set(&cpu->exit_request, 0);
1770 qemu_wait_io_event(cpu);
1771 } while (!cpu->unplug || cpu_can_run(cpu));
1773 qemu_tcg_destroy_vcpu(cpu);
1774 cpu->created = false;
1775 qemu_cond_signal(&qemu_cpu_cond);
1776 qemu_mutex_unlock_iothread();
1777 rcu_unregister_thread();
1778 return NULL;
1781 static void qemu_cpu_kick_thread(CPUState *cpu)
1783 #ifndef _WIN32
1784 int err;
1786 if (cpu->thread_kicked) {
1787 return;
1789 cpu->thread_kicked = true;
1790 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1791 if (err && err != ESRCH) {
1792 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1793 exit(1);
1795 #else /* _WIN32 */
1796 if (!qemu_cpu_is_self(cpu)) {
1797 if (whpx_enabled()) {
1798 whpx_vcpu_kick(cpu);
1799 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1800 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1801 __func__, GetLastError());
1802 exit(1);
1805 #endif
1808 void qemu_cpu_kick(CPUState *cpu)
1810 qemu_cond_broadcast(cpu->halt_cond);
1811 if (tcg_enabled()) {
1812 cpu_exit(cpu);
1813 /* NOP unless doing single-thread RR */
1814 qemu_cpu_kick_rr_cpu();
1815 } else {
1816 if (hax_enabled()) {
1818 * FIXME: race condition with the exit_request check in
1819 * hax_vcpu_hax_exec
1821 cpu->exit_request = 1;
1823 qemu_cpu_kick_thread(cpu);
1827 void qemu_cpu_kick_self(void)
1829 assert(current_cpu);
1830 qemu_cpu_kick_thread(current_cpu);
1833 bool qemu_cpu_is_self(CPUState *cpu)
1835 return qemu_thread_is_self(cpu->thread);
1838 bool qemu_in_vcpu_thread(void)
1840 return current_cpu && qemu_cpu_is_self(current_cpu);
1843 static __thread bool iothread_locked = false;
1845 bool qemu_mutex_iothread_locked(void)
1847 return iothread_locked;
1851 * The BQL is taken from so many places that it is worth profiling the
1852 * callers directly, instead of funneling them all through a single function.
1854 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1856 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1858 g_assert(!qemu_mutex_iothread_locked());
1859 bql_lock(&qemu_global_mutex, file, line);
1860 iothread_locked = true;
1863 void qemu_mutex_unlock_iothread(void)
1865 g_assert(qemu_mutex_iothread_locked());
1866 iothread_locked = false;
1867 qemu_mutex_unlock(&qemu_global_mutex);
1870 static bool all_vcpus_paused(void)
1872 CPUState *cpu;
1874 CPU_FOREACH(cpu) {
1875 if (!cpu->stopped) {
1876 return false;
1880 return true;
1883 void pause_all_vcpus(void)
1885 CPUState *cpu;
1887 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1888 CPU_FOREACH(cpu) {
1889 if (qemu_cpu_is_self(cpu)) {
1890 qemu_cpu_stop(cpu, true);
1891 } else {
1892 cpu->stop = true;
1893 qemu_cpu_kick(cpu);
1897 /* We need to drop the replay_lock so any vCPU threads woken up
1898 * can finish their replay tasks
1900 replay_mutex_unlock();
1902 while (!all_vcpus_paused()) {
1903 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1904 CPU_FOREACH(cpu) {
1905 qemu_cpu_kick(cpu);
1909 qemu_mutex_unlock_iothread();
1910 replay_mutex_lock();
1911 qemu_mutex_lock_iothread();
1914 void cpu_resume(CPUState *cpu)
1916 cpu->stop = false;
1917 cpu->stopped = false;
1918 qemu_cpu_kick(cpu);
1921 void resume_all_vcpus(void)
1923 CPUState *cpu;
1925 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1926 CPU_FOREACH(cpu) {
1927 cpu_resume(cpu);
1931 void cpu_remove_sync(CPUState *cpu)
1933 cpu->stop = true;
1934 cpu->unplug = true;
1935 qemu_cpu_kick(cpu);
1936 qemu_mutex_unlock_iothread();
1937 qemu_thread_join(cpu->thread);
1938 qemu_mutex_lock_iothread();
1941 /* For temporary buffers for forming a name */
1942 #define VCPU_THREAD_NAME_SIZE 16
1944 static void qemu_tcg_init_vcpu(CPUState *cpu)
1946 char thread_name[VCPU_THREAD_NAME_SIZE];
1947 static QemuCond *single_tcg_halt_cond;
1948 static QemuThread *single_tcg_cpu_thread;
1949 static int tcg_region_inited;
1951 assert(tcg_enabled());
1953 * Initialize TCG regions--once. Now is a good time, because:
1954 * (1) TCG's init context, prologue and target globals have been set up.
1955 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1956 * -accel flag is processed, so the check doesn't work then).
1958 if (!tcg_region_inited) {
1959 tcg_region_inited = 1;
1960 tcg_region_init();
1963 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1964 cpu->thread = g_malloc0(sizeof(QemuThread));
1965 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1966 qemu_cond_init(cpu->halt_cond);
1968 if (qemu_tcg_mttcg_enabled()) {
1969 /* create a thread per vCPU with TCG (MTTCG) */
1970 parallel_cpus = true;
1971 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1972 cpu->cpu_index);
1974 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1975 cpu, QEMU_THREAD_JOINABLE);
1977 } else {
1978 /* share a single thread for all cpus with TCG */
1979 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1980 qemu_thread_create(cpu->thread, thread_name,
1981 qemu_tcg_rr_cpu_thread_fn,
1982 cpu, QEMU_THREAD_JOINABLE);
1984 single_tcg_halt_cond = cpu->halt_cond;
1985 single_tcg_cpu_thread = cpu->thread;
1987 #ifdef _WIN32
1988 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1989 #endif
1990 } else {
1991 /* For non-MTTCG cases we share the thread */
1992 cpu->thread = single_tcg_cpu_thread;
1993 cpu->halt_cond = single_tcg_halt_cond;
1994 cpu->thread_id = first_cpu->thread_id;
1995 cpu->can_do_io = 1;
1996 cpu->created = true;
2000 static void qemu_hax_start_vcpu(CPUState *cpu)
2002 char thread_name[VCPU_THREAD_NAME_SIZE];
2004 cpu->thread = g_malloc0(sizeof(QemuThread));
2005 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2006 qemu_cond_init(cpu->halt_cond);
2008 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2009 cpu->cpu_index);
2010 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2011 cpu, QEMU_THREAD_JOINABLE);
2012 #ifdef _WIN32
2013 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2014 #endif
2017 static void qemu_kvm_start_vcpu(CPUState *cpu)
2019 char thread_name[VCPU_THREAD_NAME_SIZE];
2021 cpu->thread = g_malloc0(sizeof(QemuThread));
2022 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2023 qemu_cond_init(cpu->halt_cond);
2024 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2025 cpu->cpu_index);
2026 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2027 cpu, QEMU_THREAD_JOINABLE);
2030 static void qemu_hvf_start_vcpu(CPUState *cpu)
2032 char thread_name[VCPU_THREAD_NAME_SIZE];
2034 /* HVF currently does not support TCG, and only runs in
2035 * unrestricted-guest mode. */
2036 assert(hvf_enabled());
2038 cpu->thread = g_malloc0(sizeof(QemuThread));
2039 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2040 qemu_cond_init(cpu->halt_cond);
2042 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2043 cpu->cpu_index);
2044 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2045 cpu, QEMU_THREAD_JOINABLE);
2048 static void qemu_whpx_start_vcpu(CPUState *cpu)
2050 char thread_name[VCPU_THREAD_NAME_SIZE];
2052 cpu->thread = g_malloc0(sizeof(QemuThread));
2053 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2054 qemu_cond_init(cpu->halt_cond);
2055 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2056 cpu->cpu_index);
2057 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2058 cpu, QEMU_THREAD_JOINABLE);
2059 #ifdef _WIN32
2060 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2061 #endif
2064 static void qemu_dummy_start_vcpu(CPUState *cpu)
2066 char thread_name[VCPU_THREAD_NAME_SIZE];
2068 cpu->thread = g_malloc0(sizeof(QemuThread));
2069 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2070 qemu_cond_init(cpu->halt_cond);
2071 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2072 cpu->cpu_index);
2073 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2074 QEMU_THREAD_JOINABLE);
2077 void qemu_init_vcpu(CPUState *cpu)
2079 cpu->nr_cores = smp_cores;
2080 cpu->nr_threads = smp_threads;
2081 cpu->stopped = true;
2082 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2084 if (!cpu->as) {
2085 /* If the target cpu hasn't set up any address spaces itself,
2086 * give it the default one.
2088 cpu->num_ases = 1;
2089 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2092 if (kvm_enabled()) {
2093 qemu_kvm_start_vcpu(cpu);
2094 } else if (hax_enabled()) {
2095 qemu_hax_start_vcpu(cpu);
2096 } else if (hvf_enabled()) {
2097 qemu_hvf_start_vcpu(cpu);
2098 } else if (tcg_enabled()) {
2099 qemu_tcg_init_vcpu(cpu);
2100 } else if (whpx_enabled()) {
2101 qemu_whpx_start_vcpu(cpu);
2102 } else {
2103 qemu_dummy_start_vcpu(cpu);
2106 while (!cpu->created) {
2107 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2111 void cpu_stop_current(void)
2113 if (current_cpu) {
2114 current_cpu->stop = true;
2115 cpu_exit(current_cpu);
2119 int vm_stop(RunState state)
2121 if (qemu_in_vcpu_thread()) {
2122 qemu_system_vmstop_request_prepare();
2123 qemu_system_vmstop_request(state);
2125 * FIXME: should not return to device code in case
2126 * vm_stop() has been requested.
2128 cpu_stop_current();
2129 return 0;
2132 return do_vm_stop(state, true);
2136 * Prepare for (re)starting the VM.
2137 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2138 * running or in case of an error condition), 0 otherwise.
2140 int vm_prepare_start(void)
2142 RunState requested;
2144 qemu_vmstop_requested(&requested);
2145 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2146 return -1;
2149 /* Ensure that a STOP/RESUME pair of events is emitted if a
2150 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2151 * example, according to documentation is always followed by
2152 * the STOP event.
2154 if (runstate_is_running()) {
2155 qapi_event_send_stop();
2156 qapi_event_send_resume();
2157 return -1;
2160 /* We are sending this now, but the CPUs will be resumed shortly later */
2161 qapi_event_send_resume();
2163 replay_enable_events();
2164 cpu_enable_ticks();
2165 runstate_set(RUN_STATE_RUNNING);
2166 vm_state_notify(1, RUN_STATE_RUNNING);
2167 return 0;
2170 void vm_start(void)
2172 if (!vm_prepare_start()) {
2173 resume_all_vcpus();
2177 /* does a state transition even if the VM is already stopped,
2178 current state is forgotten forever */
2179 int vm_stop_force_state(RunState state)
2181 if (runstate_is_running()) {
2182 return vm_stop(state);
2183 } else {
2184 runstate_set(state);
2186 bdrv_drain_all();
2187 /* Make sure to return an error if the flush in a previous vm_stop()
2188 * failed. */
2189 return bdrv_flush_all();
2193 void list_cpus(const char *optarg)
2195 /* XXX: implement xxx_cpu_list for targets that still miss it */
2196 #if defined(cpu_list)
2197 cpu_list();
2198 #endif
2201 CpuInfoList *qmp_query_cpus(Error **errp)
2203 MachineState *ms = MACHINE(qdev_get_machine());
2204 MachineClass *mc = MACHINE_GET_CLASS(ms);
2205 CpuInfoList *head = NULL, *cur_item = NULL;
2206 CPUState *cpu;
2208 CPU_FOREACH(cpu) {
2209 CpuInfoList *info;
2210 #if defined(TARGET_I386)
2211 X86CPU *x86_cpu = X86_CPU(cpu);
2212 CPUX86State *env = &x86_cpu->env;
2213 #elif defined(TARGET_PPC)
2214 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2215 CPUPPCState *env = &ppc_cpu->env;
2216 #elif defined(TARGET_SPARC)
2217 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2218 CPUSPARCState *env = &sparc_cpu->env;
2219 #elif defined(TARGET_RISCV)
2220 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2221 CPURISCVState *env = &riscv_cpu->env;
2222 #elif defined(TARGET_MIPS)
2223 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2224 CPUMIPSState *env = &mips_cpu->env;
2225 #elif defined(TARGET_TRICORE)
2226 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2227 CPUTriCoreState *env = &tricore_cpu->env;
2228 #elif defined(TARGET_S390X)
2229 S390CPU *s390_cpu = S390_CPU(cpu);
2230 CPUS390XState *env = &s390_cpu->env;
2231 #endif
2233 cpu_synchronize_state(cpu);
2235 info = g_malloc0(sizeof(*info));
2236 info->value = g_malloc0(sizeof(*info->value));
2237 info->value->CPU = cpu->cpu_index;
2238 info->value->current = (cpu == first_cpu);
2239 info->value->halted = cpu->halted;
2240 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2241 info->value->thread_id = cpu->thread_id;
2242 #if defined(TARGET_I386)
2243 info->value->arch = CPU_INFO_ARCH_X86;
2244 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2245 #elif defined(TARGET_PPC)
2246 info->value->arch = CPU_INFO_ARCH_PPC;
2247 info->value->u.ppc.nip = env->nip;
2248 #elif defined(TARGET_SPARC)
2249 info->value->arch = CPU_INFO_ARCH_SPARC;
2250 info->value->u.q_sparc.pc = env->pc;
2251 info->value->u.q_sparc.npc = env->npc;
2252 #elif defined(TARGET_MIPS)
2253 info->value->arch = CPU_INFO_ARCH_MIPS;
2254 info->value->u.q_mips.PC = env->active_tc.PC;
2255 #elif defined(TARGET_TRICORE)
2256 info->value->arch = CPU_INFO_ARCH_TRICORE;
2257 info->value->u.tricore.PC = env->PC;
2258 #elif defined(TARGET_S390X)
2259 info->value->arch = CPU_INFO_ARCH_S390;
2260 info->value->u.s390.cpu_state = env->cpu_state;
2261 #elif defined(TARGET_RISCV)
2262 info->value->arch = CPU_INFO_ARCH_RISCV;
2263 info->value->u.riscv.pc = env->pc;
2264 #else
2265 info->value->arch = CPU_INFO_ARCH_OTHER;
2266 #endif
2267 info->value->has_props = !!mc->cpu_index_to_instance_props;
2268 if (info->value->has_props) {
2269 CpuInstanceProperties *props;
2270 props = g_malloc0(sizeof(*props));
2271 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2272 info->value->props = props;
2275 /* XXX: waiting for the qapi to support GSList */
2276 if (!cur_item) {
2277 head = cur_item = info;
2278 } else {
2279 cur_item->next = info;
2280 cur_item = info;
2284 return head;
2287 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2290 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2291 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2293 switch (target) {
2294 case SYS_EMU_TARGET_I386:
2295 case SYS_EMU_TARGET_X86_64:
2296 return CPU_INFO_ARCH_X86;
2298 case SYS_EMU_TARGET_PPC:
2299 case SYS_EMU_TARGET_PPC64:
2300 return CPU_INFO_ARCH_PPC;
2302 case SYS_EMU_TARGET_SPARC:
2303 case SYS_EMU_TARGET_SPARC64:
2304 return CPU_INFO_ARCH_SPARC;
2306 case SYS_EMU_TARGET_MIPS:
2307 case SYS_EMU_TARGET_MIPSEL:
2308 case SYS_EMU_TARGET_MIPS64:
2309 case SYS_EMU_TARGET_MIPS64EL:
2310 return CPU_INFO_ARCH_MIPS;
2312 case SYS_EMU_TARGET_TRICORE:
2313 return CPU_INFO_ARCH_TRICORE;
2315 case SYS_EMU_TARGET_S390X:
2316 return CPU_INFO_ARCH_S390;
2318 case SYS_EMU_TARGET_RISCV32:
2319 case SYS_EMU_TARGET_RISCV64:
2320 return CPU_INFO_ARCH_RISCV;
2322 default:
2323 return CPU_INFO_ARCH_OTHER;
2327 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2329 #ifdef TARGET_S390X
2330 S390CPU *s390_cpu = S390_CPU(cpu);
2331 CPUS390XState *env = &s390_cpu->env;
2333 info->cpu_state = env->cpu_state;
2334 #else
2335 abort();
2336 #endif
2340 * fast means: we NEVER interrupt vCPU threads to retrieve
2341 * information from KVM.
2343 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2345 MachineState *ms = MACHINE(qdev_get_machine());
2346 MachineClass *mc = MACHINE_GET_CLASS(ms);
2347 CpuInfoFastList *head = NULL, *cur_item = NULL;
2348 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2349 -1, &error_abort);
2350 CPUState *cpu;
2352 CPU_FOREACH(cpu) {
2353 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2354 info->value = g_malloc0(sizeof(*info->value));
2356 info->value->cpu_index = cpu->cpu_index;
2357 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2358 info->value->thread_id = cpu->thread_id;
2360 info->value->has_props = !!mc->cpu_index_to_instance_props;
2361 if (info->value->has_props) {
2362 CpuInstanceProperties *props;
2363 props = g_malloc0(sizeof(*props));
2364 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2365 info->value->props = props;
2368 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2369 info->value->target = target;
2370 if (target == SYS_EMU_TARGET_S390X) {
2371 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2374 if (!cur_item) {
2375 head = cur_item = info;
2376 } else {
2377 cur_item->next = info;
2378 cur_item = info;
2382 return head;
2385 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2386 bool has_cpu, int64_t cpu_index, Error **errp)
2388 FILE *f;
2389 uint32_t l;
2390 CPUState *cpu;
2391 uint8_t buf[1024];
2392 int64_t orig_addr = addr, orig_size = size;
2394 if (!has_cpu) {
2395 cpu_index = 0;
2398 cpu = qemu_get_cpu(cpu_index);
2399 if (cpu == NULL) {
2400 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2401 "a CPU number");
2402 return;
2405 f = fopen(filename, "wb");
2406 if (!f) {
2407 error_setg_file_open(errp, errno, filename);
2408 return;
2411 while (size != 0) {
2412 l = sizeof(buf);
2413 if (l > size)
2414 l = size;
2415 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2416 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2417 " specified", orig_addr, orig_size);
2418 goto exit;
2420 if (fwrite(buf, 1, l, f) != l) {
2421 error_setg(errp, QERR_IO_ERROR);
2422 goto exit;
2424 addr += l;
2425 size -= l;
2428 exit:
2429 fclose(f);
2432 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2433 Error **errp)
2435 FILE *f;
2436 uint32_t l;
2437 uint8_t buf[1024];
2439 f = fopen(filename, "wb");
2440 if (!f) {
2441 error_setg_file_open(errp, errno, filename);
2442 return;
2445 while (size != 0) {
2446 l = sizeof(buf);
2447 if (l > size)
2448 l = size;
2449 cpu_physical_memory_read(addr, buf, l);
2450 if (fwrite(buf, 1, l, f) != l) {
2451 error_setg(errp, QERR_IO_ERROR);
2452 goto exit;
2454 addr += l;
2455 size -= l;
2458 exit:
2459 fclose(f);
2462 void qmp_inject_nmi(Error **errp)
2464 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2467 void dump_drift_info(void)
2469 if (!use_icount) {
2470 return;
2473 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2474 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2475 if (icount_align_option) {
2476 qemu_printf("Max guest delay %"PRIi64" ms\n",
2477 -max_delay / SCALE_MS);
2478 qemu_printf("Max guest advance %"PRIi64" ms\n",
2479 max_advance / SCALE_MS);
2480 } else {
2481 qemu_printf("Max guest delay NA\n");
2482 qemu_printf("Max guest advance NA\n");