cpus: take seqlock across qemu_icount updates
[qemu/ericb.git] / cpus.c
blob6e1a892f8ce1ef8cdeb017eea431c05e19c13e6c
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
57 #ifdef CONFIG_LINUX
59 #include <sys/prctl.h>
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
73 #endif /* CONFIG_LINUX */
75 int64_t max_delay;
76 int64_t max_advance;
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
86 bool cpu_is_stopped(CPUState *cpu)
88 return cpu->stopped || !runstate_is_running();
91 static bool cpu_thread_is_idle(CPUState *cpu)
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
96 if (cpu_is_stopped(cpu)) {
97 return true;
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
103 return true;
106 static bool all_cpu_threads_idle(void)
108 CPUState *cpu;
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
115 return true;
118 /***********************************************************/
119 /* guest cycle counter */
121 /* Protected by TimersState seqlock */
123 static bool icount_sleep = true;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 typedef struct TimersState {
128 /* Protected by BQL. */
129 int64_t cpu_ticks_prev;
130 int64_t cpu_ticks_offset;
132 /* Protect fields that can be respectively read outside the
133 * BQL, and written from multiple threads.
135 QemuSeqLock vm_clock_seqlock;
136 QemuSpin vm_clock_lock;
138 int16_t cpu_ticks_enabled;
140 /* Conversion factor from emulated instructions to virtual clock ticks. */
141 int16_t icount_time_shift;
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
146 int64_t vm_clock_warp_start;
147 int64_t cpu_clock_offset;
149 /* Only written by TCG thread */
150 int64_t qemu_icount;
152 /* for adjusting icount */
153 QEMUTimer *icount_rt_timer;
154 QEMUTimer *icount_vm_timer;
155 QEMUTimer *icount_warp_timer;
156 } TimersState;
158 static TimersState timers_state;
159 bool mttcg_enabled;
162 * We default to false if we know other options have been enabled
163 * which are currently incompatible with MTTCG. Otherwise when each
164 * guest (target) has been updated to support:
165 * - atomic instructions
166 * - memory ordering primitives (barriers)
167 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
169 * Once a guest architecture has been converted to the new primitives
170 * there are two remaining limitations to check.
172 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
173 * - The host must have a stronger memory order than the guest
175 * It may be possible in future to support strong guests on weak hosts
176 * but that will require tagging all load/stores in a guest with their
177 * implicit memory order requirements which would likely slow things
178 * down a lot.
181 static bool check_tcg_memory_orders_compatible(void)
183 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
184 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
185 #else
186 return false;
187 #endif
190 static bool default_mttcg_enabled(void)
192 if (use_icount || TCG_OVERSIZED_GUEST) {
193 return false;
194 } else {
195 #ifdef TARGET_SUPPORTS_MTTCG
196 return check_tcg_memory_orders_compatible();
197 #else
198 return false;
199 #endif
203 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
205 const char *t = qemu_opt_get(opts, "thread");
206 if (t) {
207 if (strcmp(t, "multi") == 0) {
208 if (TCG_OVERSIZED_GUEST) {
209 error_setg(errp, "No MTTCG when guest word size > hosts");
210 } else if (use_icount) {
211 error_setg(errp, "No MTTCG when icount is enabled");
212 } else {
213 #ifndef TARGET_SUPPORTS_MTTCG
214 error_report("Guest not yet converted to MTTCG - "
215 "you may get unexpected results");
216 #endif
217 if (!check_tcg_memory_orders_compatible()) {
218 error_report("Guest expects a stronger memory ordering "
219 "than the host provides");
220 error_printf("This may cause strange/hard to debug errors\n");
222 mttcg_enabled = true;
224 } else if (strcmp(t, "single") == 0) {
225 mttcg_enabled = false;
226 } else {
227 error_setg(errp, "Invalid 'thread' setting %s", t);
229 } else {
230 mttcg_enabled = default_mttcg_enabled();
234 /* The current number of executed instructions is based on what we
235 * originally budgeted minus the current state of the decrementing
236 * icount counters in extra/u16.low.
238 static int64_t cpu_get_icount_executed(CPUState *cpu)
240 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
244 * Update the global shared timer_state.qemu_icount to take into
245 * account executed instructions. This is done by the TCG vCPU
246 * thread so the main-loop can see time has moved forward.
248 static void cpu_update_icount_locked(CPUState *cpu)
250 int64_t executed = cpu_get_icount_executed(cpu);
251 cpu->icount_budget -= executed;
253 atomic_set__nocheck(&timers_state.qemu_icount,
254 timers_state.qemu_icount + executed);
258 * Update the global shared timer_state.qemu_icount to take into
259 * account executed instructions. This is done by the TCG vCPU
260 * thread so the main-loop can see time has moved forward.
262 void cpu_update_icount(CPUState *cpu)
264 seqlock_write_lock(&timers_state.vm_clock_seqlock,
265 &timers_state.vm_clock_lock);
266 cpu_update_icount_locked(cpu);
267 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
271 static int64_t cpu_get_icount_raw_locked(void)
273 CPUState *cpu = current_cpu;
275 if (cpu && cpu->running) {
276 if (!cpu->can_do_io) {
277 error_report("Bad icount read");
278 exit(1);
280 /* Take into account what has run */
281 cpu_update_icount_locked(cpu);
283 /* The read is protected by the seqlock, so __nocheck is okay. */
284 return atomic_read__nocheck(&timers_state.qemu_icount);
287 static int64_t cpu_get_icount_locked(void)
289 int64_t icount = cpu_get_icount_raw_locked();
290 return atomic_read__nocheck(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount);
293 int64_t cpu_get_icount_raw(void)
295 int64_t icount;
296 unsigned start;
298 do {
299 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
300 icount = cpu_get_icount_raw_locked();
301 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
303 return icount;
306 /* Return the virtual CPU time, based on the instruction counter. */
307 int64_t cpu_get_icount(void)
309 int64_t icount;
310 unsigned start;
312 do {
313 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
314 icount = cpu_get_icount_locked();
315 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
317 return icount;
320 int64_t cpu_icount_to_ns(int64_t icount)
322 return icount << atomic_read(&timers_state.icount_time_shift);
325 static int64_t cpu_get_ticks_locked(void)
327 int64_t ticks = timers_state.cpu_ticks_offset;
328 if (timers_state.cpu_ticks_enabled) {
329 ticks += cpu_get_host_ticks();
332 if (timers_state.cpu_ticks_prev > ticks) {
333 /* Non increasing ticks may happen if the host uses software suspend. */
334 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
335 ticks = timers_state.cpu_ticks_prev;
338 timers_state.cpu_ticks_prev = ticks;
339 return ticks;
342 /* return the time elapsed in VM between vm_start and vm_stop. Unless
343 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
344 * counter.
346 int64_t cpu_get_ticks(void)
348 int64_t ticks;
350 if (use_icount) {
351 return cpu_get_icount();
354 qemu_spin_lock(&timers_state.vm_clock_lock);
355 ticks = cpu_get_ticks_locked();
356 qemu_spin_unlock(&timers_state.vm_clock_lock);
357 return ticks;
360 static int64_t cpu_get_clock_locked(void)
362 int64_t time;
364 time = timers_state.cpu_clock_offset;
365 if (timers_state.cpu_ticks_enabled) {
366 time += get_clock();
369 return time;
372 /* Return the monotonic time elapsed in VM, i.e.,
373 * the time between vm_start and vm_stop
375 int64_t cpu_get_clock(void)
377 int64_t ti;
378 unsigned start;
380 do {
381 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
382 ti = cpu_get_clock_locked();
383 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
385 return ti;
388 /* enable cpu_get_ticks()
389 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
391 void cpu_enable_ticks(void)
393 seqlock_write_lock(&timers_state.vm_clock_seqlock,
394 &timers_state.vm_clock_lock);
395 if (!timers_state.cpu_ticks_enabled) {
396 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
397 timers_state.cpu_clock_offset -= get_clock();
398 timers_state.cpu_ticks_enabled = 1;
400 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
401 &timers_state.vm_clock_lock);
404 /* disable cpu_get_ticks() : the clock is stopped. You must not call
405 * cpu_get_ticks() after that.
406 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
408 void cpu_disable_ticks(void)
410 seqlock_write_lock(&timers_state.vm_clock_seqlock,
411 &timers_state.vm_clock_lock);
412 if (timers_state.cpu_ticks_enabled) {
413 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
414 timers_state.cpu_clock_offset = cpu_get_clock_locked();
415 timers_state.cpu_ticks_enabled = 0;
417 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
418 &timers_state.vm_clock_lock);
421 /* Correlation between real and virtual time is always going to be
422 fairly approximate, so ignore small variation.
423 When the guest is idle real and virtual time will be aligned in
424 the IO wait loop. */
425 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
427 static void icount_adjust(void)
429 int64_t cur_time;
430 int64_t cur_icount;
431 int64_t delta;
433 /* Protected by TimersState mutex. */
434 static int64_t last_delta;
436 /* If the VM is not running, then do nothing. */
437 if (!runstate_is_running()) {
438 return;
441 seqlock_write_lock(&timers_state.vm_clock_seqlock,
442 &timers_state.vm_clock_lock);
443 cur_time = cpu_get_clock_locked();
444 cur_icount = cpu_get_icount_locked();
446 delta = cur_icount - cur_time;
447 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
448 if (delta > 0
449 && last_delta + ICOUNT_WOBBLE < delta * 2
450 && timers_state.icount_time_shift > 0) {
451 /* The guest is getting too far ahead. Slow time down. */
452 atomic_set(&timers_state.icount_time_shift,
453 timers_state.icount_time_shift - 1);
455 if (delta < 0
456 && last_delta - ICOUNT_WOBBLE > delta * 2
457 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
458 /* The guest is getting too far behind. Speed time up. */
459 atomic_set(&timers_state.icount_time_shift,
460 timers_state.icount_time_shift + 1);
462 last_delta = delta;
463 atomic_set__nocheck(&timers_state.qemu_icount_bias,
464 cur_icount - (timers_state.qemu_icount
465 << timers_state.icount_time_shift));
466 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
467 &timers_state.vm_clock_lock);
470 static void icount_adjust_rt(void *opaque)
472 timer_mod(timers_state.icount_rt_timer,
473 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
474 icount_adjust();
477 static void icount_adjust_vm(void *opaque)
479 timer_mod(timers_state.icount_vm_timer,
480 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
481 NANOSECONDS_PER_SECOND / 10);
482 icount_adjust();
485 static int64_t qemu_icount_round(int64_t count)
487 int shift = atomic_read(&timers_state.icount_time_shift);
488 return (count + (1 << shift) - 1) >> shift;
491 static void icount_warp_rt(void)
493 unsigned seq;
494 int64_t warp_start;
496 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
497 * changes from -1 to another value, so the race here is okay.
499 do {
500 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
501 warp_start = timers_state.vm_clock_warp_start;
502 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
504 if (warp_start == -1) {
505 return;
508 seqlock_write_lock(&timers_state.vm_clock_seqlock,
509 &timers_state.vm_clock_lock);
510 if (runstate_is_running()) {
511 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
512 cpu_get_clock_locked());
513 int64_t warp_delta;
515 warp_delta = clock - timers_state.vm_clock_warp_start;
516 if (use_icount == 2) {
518 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
519 * far ahead of real time.
521 int64_t cur_icount = cpu_get_icount_locked();
522 int64_t delta = clock - cur_icount;
523 warp_delta = MIN(warp_delta, delta);
525 atomic_set__nocheck(&timers_state.qemu_icount_bias,
526 timers_state.qemu_icount_bias + warp_delta);
528 timers_state.vm_clock_warp_start = -1;
529 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
530 &timers_state.vm_clock_lock);
532 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
533 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
537 static void icount_timer_cb(void *opaque)
539 /* No need for a checkpoint because the timer already synchronizes
540 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
542 icount_warp_rt();
545 void qtest_clock_warp(int64_t dest)
547 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
548 AioContext *aio_context;
549 assert(qtest_enabled());
550 aio_context = qemu_get_aio_context();
551 while (clock < dest) {
552 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
553 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
555 seqlock_write_lock(&timers_state.vm_clock_seqlock,
556 &timers_state.vm_clock_lock);
557 atomic_set__nocheck(&timers_state.qemu_icount_bias,
558 timers_state.qemu_icount_bias + warp);
559 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
560 &timers_state.vm_clock_lock);
562 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
563 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
564 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
566 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
569 void qemu_start_warp_timer(void)
571 int64_t clock;
572 int64_t deadline;
574 if (!use_icount) {
575 return;
578 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
579 * do not fire, so computing the deadline does not make sense.
581 if (!runstate_is_running()) {
582 return;
585 /* warp clock deterministically in record/replay mode */
586 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
587 return;
590 if (!all_cpu_threads_idle()) {
591 return;
594 if (qtest_enabled()) {
595 /* When testing, qtest commands advance icount. */
596 return;
599 /* We want to use the earliest deadline from ALL vm_clocks */
600 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
601 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
602 if (deadline < 0) {
603 static bool notified;
604 if (!icount_sleep && !notified) {
605 warn_report("icount sleep disabled and no active timers");
606 notified = true;
608 return;
611 if (deadline > 0) {
613 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
614 * sleep. Otherwise, the CPU might be waiting for a future timer
615 * interrupt to wake it up, but the interrupt never comes because
616 * the vCPU isn't running any insns and thus doesn't advance the
617 * QEMU_CLOCK_VIRTUAL.
619 if (!icount_sleep) {
621 * We never let VCPUs sleep in no sleep icount mode.
622 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
623 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
624 * It is useful when we want a deterministic execution time,
625 * isolated from host latencies.
627 seqlock_write_lock(&timers_state.vm_clock_seqlock,
628 &timers_state.vm_clock_lock);
629 atomic_set__nocheck(&timers_state.qemu_icount_bias,
630 timers_state.qemu_icount_bias + deadline);
631 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
632 &timers_state.vm_clock_lock);
633 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
634 } else {
636 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
637 * "real" time, (related to the time left until the next event) has
638 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
639 * This avoids that the warps are visible externally; for example,
640 * you will not be sending network packets continuously instead of
641 * every 100ms.
643 seqlock_write_lock(&timers_state.vm_clock_seqlock,
644 &timers_state.vm_clock_lock);
645 if (timers_state.vm_clock_warp_start == -1
646 || timers_state.vm_clock_warp_start > clock) {
647 timers_state.vm_clock_warp_start = clock;
649 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
650 &timers_state.vm_clock_lock);
651 timer_mod_anticipate(timers_state.icount_warp_timer,
652 clock + deadline);
654 } else if (deadline == 0) {
655 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
659 static void qemu_account_warp_timer(void)
661 if (!use_icount || !icount_sleep) {
662 return;
665 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
666 * do not fire, so computing the deadline does not make sense.
668 if (!runstate_is_running()) {
669 return;
672 /* warp clock deterministically in record/replay mode */
673 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
674 return;
677 timer_del(timers_state.icount_warp_timer);
678 icount_warp_rt();
681 static bool icount_state_needed(void *opaque)
683 return use_icount;
686 static bool warp_timer_state_needed(void *opaque)
688 TimersState *s = opaque;
689 return s->icount_warp_timer != NULL;
692 static bool adjust_timers_state_needed(void *opaque)
694 TimersState *s = opaque;
695 return s->icount_rt_timer != NULL;
699 * Subsection for warp timer migration is optional, because may not be created
701 static const VMStateDescription icount_vmstate_warp_timer = {
702 .name = "timer/icount/warp_timer",
703 .version_id = 1,
704 .minimum_version_id = 1,
705 .needed = warp_timer_state_needed,
706 .fields = (VMStateField[]) {
707 VMSTATE_INT64(vm_clock_warp_start, TimersState),
708 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
709 VMSTATE_END_OF_LIST()
713 static const VMStateDescription icount_vmstate_adjust_timers = {
714 .name = "timer/icount/timers",
715 .version_id = 1,
716 .minimum_version_id = 1,
717 .needed = adjust_timers_state_needed,
718 .fields = (VMStateField[]) {
719 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
720 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
721 VMSTATE_END_OF_LIST()
726 * This is a subsection for icount migration.
728 static const VMStateDescription icount_vmstate_timers = {
729 .name = "timer/icount",
730 .version_id = 1,
731 .minimum_version_id = 1,
732 .needed = icount_state_needed,
733 .fields = (VMStateField[]) {
734 VMSTATE_INT64(qemu_icount_bias, TimersState),
735 VMSTATE_INT64(qemu_icount, TimersState),
736 VMSTATE_END_OF_LIST()
738 .subsections = (const VMStateDescription*[]) {
739 &icount_vmstate_warp_timer,
740 &icount_vmstate_adjust_timers,
741 NULL
745 static const VMStateDescription vmstate_timers = {
746 .name = "timer",
747 .version_id = 2,
748 .minimum_version_id = 1,
749 .fields = (VMStateField[]) {
750 VMSTATE_INT64(cpu_ticks_offset, TimersState),
751 VMSTATE_UNUSED(8),
752 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
753 VMSTATE_END_OF_LIST()
755 .subsections = (const VMStateDescription*[]) {
756 &icount_vmstate_timers,
757 NULL
761 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
763 double pct;
764 double throttle_ratio;
765 long sleeptime_ns;
767 if (!cpu_throttle_get_percentage()) {
768 return;
771 pct = (double)cpu_throttle_get_percentage()/100;
772 throttle_ratio = pct / (1 - pct);
773 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
775 qemu_mutex_unlock_iothread();
776 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
777 qemu_mutex_lock_iothread();
778 atomic_set(&cpu->throttle_thread_scheduled, 0);
781 static void cpu_throttle_timer_tick(void *opaque)
783 CPUState *cpu;
784 double pct;
786 /* Stop the timer if needed */
787 if (!cpu_throttle_get_percentage()) {
788 return;
790 CPU_FOREACH(cpu) {
791 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
792 async_run_on_cpu(cpu, cpu_throttle_thread,
793 RUN_ON_CPU_NULL);
797 pct = (double)cpu_throttle_get_percentage()/100;
798 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
799 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
802 void cpu_throttle_set(int new_throttle_pct)
804 /* Ensure throttle percentage is within valid range */
805 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
806 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
808 atomic_set(&throttle_percentage, new_throttle_pct);
810 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
811 CPU_THROTTLE_TIMESLICE_NS);
814 void cpu_throttle_stop(void)
816 atomic_set(&throttle_percentage, 0);
819 bool cpu_throttle_active(void)
821 return (cpu_throttle_get_percentage() != 0);
824 int cpu_throttle_get_percentage(void)
826 return atomic_read(&throttle_percentage);
829 void cpu_ticks_init(void)
831 seqlock_init(&timers_state.vm_clock_seqlock);
832 qemu_spin_init(&timers_state.vm_clock_lock);
833 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
834 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
835 cpu_throttle_timer_tick, NULL);
838 void configure_icount(QemuOpts *opts, Error **errp)
840 const char *option;
841 char *rem_str = NULL;
843 option = qemu_opt_get(opts, "shift");
844 if (!option) {
845 if (qemu_opt_get(opts, "align") != NULL) {
846 error_setg(errp, "Please specify shift option when using align");
848 return;
851 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
852 if (icount_sleep) {
853 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
854 icount_timer_cb, NULL);
857 icount_align_option = qemu_opt_get_bool(opts, "align", false);
859 if (icount_align_option && !icount_sleep) {
860 error_setg(errp, "align=on and sleep=off are incompatible");
862 if (strcmp(option, "auto") != 0) {
863 errno = 0;
864 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
865 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
866 error_setg(errp, "icount: Invalid shift value");
868 use_icount = 1;
869 return;
870 } else if (icount_align_option) {
871 error_setg(errp, "shift=auto and align=on are incompatible");
872 } else if (!icount_sleep) {
873 error_setg(errp, "shift=auto and sleep=off are incompatible");
876 use_icount = 2;
878 /* 125MIPS seems a reasonable initial guess at the guest speed.
879 It will be corrected fairly quickly anyway. */
880 timers_state.icount_time_shift = 3;
882 /* Have both realtime and virtual time triggers for speed adjustment.
883 The realtime trigger catches emulated time passing too slowly,
884 the virtual time trigger catches emulated time passing too fast.
885 Realtime triggers occur even when idle, so use them less frequently
886 than VM triggers. */
887 timers_state.vm_clock_warp_start = -1;
888 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
889 icount_adjust_rt, NULL);
890 timer_mod(timers_state.icount_rt_timer,
891 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
892 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
893 icount_adjust_vm, NULL);
894 timer_mod(timers_state.icount_vm_timer,
895 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
896 NANOSECONDS_PER_SECOND / 10);
899 /***********************************************************/
900 /* TCG vCPU kick timer
902 * The kick timer is responsible for moving single threaded vCPU
903 * emulation on to the next vCPU. If more than one vCPU is running a
904 * timer event with force a cpu->exit so the next vCPU can get
905 * scheduled.
907 * The timer is removed if all vCPUs are idle and restarted again once
908 * idleness is complete.
911 static QEMUTimer *tcg_kick_vcpu_timer;
912 static CPUState *tcg_current_rr_cpu;
914 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
916 static inline int64_t qemu_tcg_next_kick(void)
918 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
921 /* Kick the currently round-robin scheduled vCPU */
922 static void qemu_cpu_kick_rr_cpu(void)
924 CPUState *cpu;
925 do {
926 cpu = atomic_mb_read(&tcg_current_rr_cpu);
927 if (cpu) {
928 cpu_exit(cpu);
930 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
933 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
937 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
939 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
940 qemu_notify_event();
941 return;
944 if (qemu_in_vcpu_thread()) {
945 /* A CPU is currently running; kick it back out to the
946 * tcg_cpu_exec() loop so it will recalculate its
947 * icount deadline immediately.
949 qemu_cpu_kick(current_cpu);
950 } else if (first_cpu) {
951 /* qemu_cpu_kick is not enough to kick a halted CPU out of
952 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
953 * causes cpu_thread_is_idle to return false. This way,
954 * handle_icount_deadline can run.
955 * If we have no CPUs at all for some reason, we don't
956 * need to do anything.
958 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
962 static void kick_tcg_thread(void *opaque)
964 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
965 qemu_cpu_kick_rr_cpu();
968 static void start_tcg_kick_timer(void)
970 assert(!mttcg_enabled);
971 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
972 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
973 kick_tcg_thread, NULL);
974 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
978 static void stop_tcg_kick_timer(void)
980 assert(!mttcg_enabled);
981 if (tcg_kick_vcpu_timer) {
982 timer_del(tcg_kick_vcpu_timer);
983 tcg_kick_vcpu_timer = NULL;
987 /***********************************************************/
988 void hw_error(const char *fmt, ...)
990 va_list ap;
991 CPUState *cpu;
993 va_start(ap, fmt);
994 fprintf(stderr, "qemu: hardware error: ");
995 vfprintf(stderr, fmt, ap);
996 fprintf(stderr, "\n");
997 CPU_FOREACH(cpu) {
998 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
999 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
1001 va_end(ap);
1002 abort();
1005 void cpu_synchronize_all_states(void)
1007 CPUState *cpu;
1009 CPU_FOREACH(cpu) {
1010 cpu_synchronize_state(cpu);
1011 /* TODO: move to cpu_synchronize_state() */
1012 if (hvf_enabled()) {
1013 hvf_cpu_synchronize_state(cpu);
1018 void cpu_synchronize_all_post_reset(void)
1020 CPUState *cpu;
1022 CPU_FOREACH(cpu) {
1023 cpu_synchronize_post_reset(cpu);
1024 /* TODO: move to cpu_synchronize_post_reset() */
1025 if (hvf_enabled()) {
1026 hvf_cpu_synchronize_post_reset(cpu);
1031 void cpu_synchronize_all_post_init(void)
1033 CPUState *cpu;
1035 CPU_FOREACH(cpu) {
1036 cpu_synchronize_post_init(cpu);
1037 /* TODO: move to cpu_synchronize_post_init() */
1038 if (hvf_enabled()) {
1039 hvf_cpu_synchronize_post_init(cpu);
1044 void cpu_synchronize_all_pre_loadvm(void)
1046 CPUState *cpu;
1048 CPU_FOREACH(cpu) {
1049 cpu_synchronize_pre_loadvm(cpu);
1053 static int do_vm_stop(RunState state, bool send_stop)
1055 int ret = 0;
1057 if (runstate_is_running()) {
1058 cpu_disable_ticks();
1059 pause_all_vcpus();
1060 runstate_set(state);
1061 vm_state_notify(0, state);
1062 if (send_stop) {
1063 qapi_event_send_stop();
1067 bdrv_drain_all();
1068 replay_disable_events();
1069 ret = bdrv_flush_all();
1071 return ret;
1074 /* Special vm_stop() variant for terminating the process. Historically clients
1075 * did not expect a QMP STOP event and so we need to retain compatibility.
1077 int vm_shutdown(void)
1079 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1082 static bool cpu_can_run(CPUState *cpu)
1084 if (cpu->stop) {
1085 return false;
1087 if (cpu_is_stopped(cpu)) {
1088 return false;
1090 return true;
1093 static void cpu_handle_guest_debug(CPUState *cpu)
1095 gdb_set_stop_cpu(cpu);
1096 qemu_system_debug_request();
1097 cpu->stopped = true;
1100 #ifdef CONFIG_LINUX
1101 static void sigbus_reraise(void)
1103 sigset_t set;
1104 struct sigaction action;
1106 memset(&action, 0, sizeof(action));
1107 action.sa_handler = SIG_DFL;
1108 if (!sigaction(SIGBUS, &action, NULL)) {
1109 raise(SIGBUS);
1110 sigemptyset(&set);
1111 sigaddset(&set, SIGBUS);
1112 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1114 perror("Failed to re-raise SIGBUS!\n");
1115 abort();
1118 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1120 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1121 sigbus_reraise();
1124 if (current_cpu) {
1125 /* Called asynchronously in VCPU thread. */
1126 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1127 sigbus_reraise();
1129 } else {
1130 /* Called synchronously (via signalfd) in main thread. */
1131 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1132 sigbus_reraise();
1137 static void qemu_init_sigbus(void)
1139 struct sigaction action;
1141 memset(&action, 0, sizeof(action));
1142 action.sa_flags = SA_SIGINFO;
1143 action.sa_sigaction = sigbus_handler;
1144 sigaction(SIGBUS, &action, NULL);
1146 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1148 #else /* !CONFIG_LINUX */
1149 static void qemu_init_sigbus(void)
1152 #endif /* !CONFIG_LINUX */
1154 static QemuMutex qemu_global_mutex;
1156 static QemuThread io_thread;
1158 /* cpu creation */
1159 static QemuCond qemu_cpu_cond;
1160 /* system init */
1161 static QemuCond qemu_pause_cond;
1163 void qemu_init_cpu_loop(void)
1165 qemu_init_sigbus();
1166 qemu_cond_init(&qemu_cpu_cond);
1167 qemu_cond_init(&qemu_pause_cond);
1168 qemu_mutex_init(&qemu_global_mutex);
1170 qemu_thread_get_self(&io_thread);
1173 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1175 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1178 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1180 if (kvm_destroy_vcpu(cpu) < 0) {
1181 error_report("kvm_destroy_vcpu failed");
1182 exit(EXIT_FAILURE);
1186 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1190 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1192 g_assert(qemu_cpu_is_self(cpu));
1193 cpu->stop = false;
1194 cpu->stopped = true;
1195 if (exit) {
1196 cpu_exit(cpu);
1198 qemu_cond_broadcast(&qemu_pause_cond);
1201 static void qemu_wait_io_event_common(CPUState *cpu)
1203 atomic_mb_set(&cpu->thread_kicked, false);
1204 if (cpu->stop) {
1205 qemu_cpu_stop(cpu, false);
1207 process_queued_cpu_work(cpu);
1210 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1212 while (all_cpu_threads_idle()) {
1213 stop_tcg_kick_timer();
1214 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1217 start_tcg_kick_timer();
1219 qemu_wait_io_event_common(cpu);
1222 static void qemu_wait_io_event(CPUState *cpu)
1224 while (cpu_thread_is_idle(cpu)) {
1225 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1228 #ifdef _WIN32
1229 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1230 if (!tcg_enabled()) {
1231 SleepEx(0, TRUE);
1233 #endif
1234 qemu_wait_io_event_common(cpu);
1237 static void *qemu_kvm_cpu_thread_fn(void *arg)
1239 CPUState *cpu = arg;
1240 int r;
1242 rcu_register_thread();
1244 qemu_mutex_lock_iothread();
1245 qemu_thread_get_self(cpu->thread);
1246 cpu->thread_id = qemu_get_thread_id();
1247 cpu->can_do_io = 1;
1248 current_cpu = cpu;
1250 r = kvm_init_vcpu(cpu);
1251 if (r < 0) {
1252 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1253 exit(1);
1256 kvm_init_cpu_signals(cpu);
1258 /* signal CPU creation */
1259 cpu->created = true;
1260 qemu_cond_signal(&qemu_cpu_cond);
1262 do {
1263 if (cpu_can_run(cpu)) {
1264 r = kvm_cpu_exec(cpu);
1265 if (r == EXCP_DEBUG) {
1266 cpu_handle_guest_debug(cpu);
1269 qemu_wait_io_event(cpu);
1270 } while (!cpu->unplug || cpu_can_run(cpu));
1272 qemu_kvm_destroy_vcpu(cpu);
1273 cpu->created = false;
1274 qemu_cond_signal(&qemu_cpu_cond);
1275 qemu_mutex_unlock_iothread();
1276 rcu_unregister_thread();
1277 return NULL;
1280 static void *qemu_dummy_cpu_thread_fn(void *arg)
1282 #ifdef _WIN32
1283 error_report("qtest is not supported under Windows");
1284 exit(1);
1285 #else
1286 CPUState *cpu = arg;
1287 sigset_t waitset;
1288 int r;
1290 rcu_register_thread();
1292 qemu_mutex_lock_iothread();
1293 qemu_thread_get_self(cpu->thread);
1294 cpu->thread_id = qemu_get_thread_id();
1295 cpu->can_do_io = 1;
1296 current_cpu = cpu;
1298 sigemptyset(&waitset);
1299 sigaddset(&waitset, SIG_IPI);
1301 /* signal CPU creation */
1302 cpu->created = true;
1303 qemu_cond_signal(&qemu_cpu_cond);
1305 do {
1306 qemu_mutex_unlock_iothread();
1307 do {
1308 int sig;
1309 r = sigwait(&waitset, &sig);
1310 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1311 if (r == -1) {
1312 perror("sigwait");
1313 exit(1);
1315 qemu_mutex_lock_iothread();
1316 qemu_wait_io_event(cpu);
1317 } while (!cpu->unplug);
1319 rcu_unregister_thread();
1320 return NULL;
1321 #endif
1324 static int64_t tcg_get_icount_limit(void)
1326 int64_t deadline;
1328 if (replay_mode != REPLAY_MODE_PLAY) {
1329 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1331 /* Maintain prior (possibly buggy) behaviour where if no deadline
1332 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1333 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1334 * nanoseconds.
1336 if ((deadline < 0) || (deadline > INT32_MAX)) {
1337 deadline = INT32_MAX;
1340 return qemu_icount_round(deadline);
1341 } else {
1342 return replay_get_instructions();
1346 static void handle_icount_deadline(void)
1348 assert(qemu_in_vcpu_thread());
1349 if (use_icount) {
1350 int64_t deadline =
1351 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1353 if (deadline == 0) {
1354 /* Wake up other AioContexts. */
1355 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1356 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1361 static void prepare_icount_for_run(CPUState *cpu)
1363 if (use_icount) {
1364 int insns_left;
1366 /* These should always be cleared by process_icount_data after
1367 * each vCPU execution. However u16.high can be raised
1368 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1370 g_assert(cpu->icount_decr.u16.low == 0);
1371 g_assert(cpu->icount_extra == 0);
1373 cpu->icount_budget = tcg_get_icount_limit();
1374 insns_left = MIN(0xffff, cpu->icount_budget);
1375 cpu->icount_decr.u16.low = insns_left;
1376 cpu->icount_extra = cpu->icount_budget - insns_left;
1378 replay_mutex_lock();
1382 static void process_icount_data(CPUState *cpu)
1384 if (use_icount) {
1385 /* Account for executed instructions */
1386 cpu_update_icount(cpu);
1388 /* Reset the counters */
1389 cpu->icount_decr.u16.low = 0;
1390 cpu->icount_extra = 0;
1391 cpu->icount_budget = 0;
1393 replay_account_executed_instructions();
1395 replay_mutex_unlock();
1400 static int tcg_cpu_exec(CPUState *cpu)
1402 int ret;
1403 #ifdef CONFIG_PROFILER
1404 int64_t ti;
1405 #endif
1407 assert(tcg_enabled());
1408 #ifdef CONFIG_PROFILER
1409 ti = profile_getclock();
1410 #endif
1411 cpu_exec_start(cpu);
1412 ret = cpu_exec(cpu);
1413 cpu_exec_end(cpu);
1414 #ifdef CONFIG_PROFILER
1415 tcg_time += profile_getclock() - ti;
1416 #endif
1417 return ret;
1420 /* Destroy any remaining vCPUs which have been unplugged and have
1421 * finished running
1423 static void deal_with_unplugged_cpus(void)
1425 CPUState *cpu;
1427 CPU_FOREACH(cpu) {
1428 if (cpu->unplug && !cpu_can_run(cpu)) {
1429 qemu_tcg_destroy_vcpu(cpu);
1430 cpu->created = false;
1431 qemu_cond_signal(&qemu_cpu_cond);
1432 break;
1437 /* Single-threaded TCG
1439 * In the single-threaded case each vCPU is simulated in turn. If
1440 * there is more than a single vCPU we create a simple timer to kick
1441 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1442 * This is done explicitly rather than relying on side-effects
1443 * elsewhere.
1446 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1448 CPUState *cpu = arg;
1450 assert(tcg_enabled());
1451 rcu_register_thread();
1452 tcg_register_thread();
1454 qemu_mutex_lock_iothread();
1455 qemu_thread_get_self(cpu->thread);
1457 cpu->thread_id = qemu_get_thread_id();
1458 cpu->created = true;
1459 cpu->can_do_io = 1;
1460 qemu_cond_signal(&qemu_cpu_cond);
1462 /* wait for initial kick-off after machine start */
1463 while (first_cpu->stopped) {
1464 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1466 /* process any pending work */
1467 CPU_FOREACH(cpu) {
1468 current_cpu = cpu;
1469 qemu_wait_io_event_common(cpu);
1473 start_tcg_kick_timer();
1475 cpu = first_cpu;
1477 /* process any pending work */
1478 cpu->exit_request = 1;
1480 while (1) {
1481 qemu_mutex_unlock_iothread();
1482 replay_mutex_lock();
1483 qemu_mutex_lock_iothread();
1484 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1485 qemu_account_warp_timer();
1487 /* Run the timers here. This is much more efficient than
1488 * waking up the I/O thread and waiting for completion.
1490 handle_icount_deadline();
1492 replay_mutex_unlock();
1494 if (!cpu) {
1495 cpu = first_cpu;
1498 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1500 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1501 current_cpu = cpu;
1503 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1504 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1506 if (cpu_can_run(cpu)) {
1507 int r;
1509 qemu_mutex_unlock_iothread();
1510 prepare_icount_for_run(cpu);
1512 r = tcg_cpu_exec(cpu);
1514 process_icount_data(cpu);
1515 qemu_mutex_lock_iothread();
1517 if (r == EXCP_DEBUG) {
1518 cpu_handle_guest_debug(cpu);
1519 break;
1520 } else if (r == EXCP_ATOMIC) {
1521 qemu_mutex_unlock_iothread();
1522 cpu_exec_step_atomic(cpu);
1523 qemu_mutex_lock_iothread();
1524 break;
1526 } else if (cpu->stop) {
1527 if (cpu->unplug) {
1528 cpu = CPU_NEXT(cpu);
1530 break;
1533 cpu = CPU_NEXT(cpu);
1534 } /* while (cpu && !cpu->exit_request).. */
1536 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1537 atomic_set(&tcg_current_rr_cpu, NULL);
1539 if (cpu && cpu->exit_request) {
1540 atomic_mb_set(&cpu->exit_request, 0);
1543 qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
1544 deal_with_unplugged_cpus();
1547 rcu_unregister_thread();
1548 return NULL;
1551 static void *qemu_hax_cpu_thread_fn(void *arg)
1553 CPUState *cpu = arg;
1554 int r;
1556 rcu_register_thread();
1557 qemu_mutex_lock_iothread();
1558 qemu_thread_get_self(cpu->thread);
1560 cpu->thread_id = qemu_get_thread_id();
1561 cpu->created = true;
1562 cpu->halted = 0;
1563 current_cpu = cpu;
1565 hax_init_vcpu(cpu);
1566 qemu_cond_signal(&qemu_cpu_cond);
1568 do {
1569 if (cpu_can_run(cpu)) {
1570 r = hax_smp_cpu_exec(cpu);
1571 if (r == EXCP_DEBUG) {
1572 cpu_handle_guest_debug(cpu);
1576 qemu_wait_io_event(cpu);
1577 } while (!cpu->unplug || cpu_can_run(cpu));
1578 rcu_unregister_thread();
1579 return NULL;
1582 /* The HVF-specific vCPU thread function. This one should only run when the host
1583 * CPU supports the VMX "unrestricted guest" feature. */
1584 static void *qemu_hvf_cpu_thread_fn(void *arg)
1586 CPUState *cpu = arg;
1588 int r;
1590 assert(hvf_enabled());
1592 rcu_register_thread();
1594 qemu_mutex_lock_iothread();
1595 qemu_thread_get_self(cpu->thread);
1597 cpu->thread_id = qemu_get_thread_id();
1598 cpu->can_do_io = 1;
1599 current_cpu = cpu;
1601 hvf_init_vcpu(cpu);
1603 /* signal CPU creation */
1604 cpu->created = true;
1605 qemu_cond_signal(&qemu_cpu_cond);
1607 do {
1608 if (cpu_can_run(cpu)) {
1609 r = hvf_vcpu_exec(cpu);
1610 if (r == EXCP_DEBUG) {
1611 cpu_handle_guest_debug(cpu);
1614 qemu_wait_io_event(cpu);
1615 } while (!cpu->unplug || cpu_can_run(cpu));
1617 hvf_vcpu_destroy(cpu);
1618 cpu->created = false;
1619 qemu_cond_signal(&qemu_cpu_cond);
1620 qemu_mutex_unlock_iothread();
1621 rcu_unregister_thread();
1622 return NULL;
1625 static void *qemu_whpx_cpu_thread_fn(void *arg)
1627 CPUState *cpu = arg;
1628 int r;
1630 rcu_register_thread();
1632 qemu_mutex_lock_iothread();
1633 qemu_thread_get_self(cpu->thread);
1634 cpu->thread_id = qemu_get_thread_id();
1635 current_cpu = cpu;
1637 r = whpx_init_vcpu(cpu);
1638 if (r < 0) {
1639 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1640 exit(1);
1643 /* signal CPU creation */
1644 cpu->created = true;
1645 qemu_cond_signal(&qemu_cpu_cond);
1647 do {
1648 if (cpu_can_run(cpu)) {
1649 r = whpx_vcpu_exec(cpu);
1650 if (r == EXCP_DEBUG) {
1651 cpu_handle_guest_debug(cpu);
1654 while (cpu_thread_is_idle(cpu)) {
1655 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1657 qemu_wait_io_event_common(cpu);
1658 } while (!cpu->unplug || cpu_can_run(cpu));
1660 whpx_destroy_vcpu(cpu);
1661 cpu->created = false;
1662 qemu_cond_signal(&qemu_cpu_cond);
1663 qemu_mutex_unlock_iothread();
1664 rcu_unregister_thread();
1665 return NULL;
1668 #ifdef _WIN32
1669 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1672 #endif
1674 /* Multi-threaded TCG
1676 * In the multi-threaded case each vCPU has its own thread. The TLS
1677 * variable current_cpu can be used deep in the code to find the
1678 * current CPUState for a given thread.
1681 static void *qemu_tcg_cpu_thread_fn(void *arg)
1683 CPUState *cpu = arg;
1685 assert(tcg_enabled());
1686 g_assert(!use_icount);
1688 rcu_register_thread();
1689 tcg_register_thread();
1691 qemu_mutex_lock_iothread();
1692 qemu_thread_get_self(cpu->thread);
1694 cpu->thread_id = qemu_get_thread_id();
1695 cpu->created = true;
1696 cpu->can_do_io = 1;
1697 current_cpu = cpu;
1698 qemu_cond_signal(&qemu_cpu_cond);
1700 /* process any pending work */
1701 cpu->exit_request = 1;
1703 do {
1704 if (cpu_can_run(cpu)) {
1705 int r;
1706 qemu_mutex_unlock_iothread();
1707 r = tcg_cpu_exec(cpu);
1708 qemu_mutex_lock_iothread();
1709 switch (r) {
1710 case EXCP_DEBUG:
1711 cpu_handle_guest_debug(cpu);
1712 break;
1713 case EXCP_HALTED:
1714 /* during start-up the vCPU is reset and the thread is
1715 * kicked several times. If we don't ensure we go back
1716 * to sleep in the halted state we won't cleanly
1717 * start-up when the vCPU is enabled.
1719 * cpu->halted should ensure we sleep in wait_io_event
1721 g_assert(cpu->halted);
1722 break;
1723 case EXCP_ATOMIC:
1724 qemu_mutex_unlock_iothread();
1725 cpu_exec_step_atomic(cpu);
1726 qemu_mutex_lock_iothread();
1727 default:
1728 /* Ignore everything else? */
1729 break;
1733 atomic_mb_set(&cpu->exit_request, 0);
1734 qemu_wait_io_event(cpu);
1735 } while (!cpu->unplug || cpu_can_run(cpu));
1737 qemu_tcg_destroy_vcpu(cpu);
1738 cpu->created = false;
1739 qemu_cond_signal(&qemu_cpu_cond);
1740 qemu_mutex_unlock_iothread();
1741 rcu_unregister_thread();
1742 return NULL;
1745 static void qemu_cpu_kick_thread(CPUState *cpu)
1747 #ifndef _WIN32
1748 int err;
1750 if (cpu->thread_kicked) {
1751 return;
1753 cpu->thread_kicked = true;
1754 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1755 if (err) {
1756 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1757 exit(1);
1759 #else /* _WIN32 */
1760 if (!qemu_cpu_is_self(cpu)) {
1761 if (whpx_enabled()) {
1762 whpx_vcpu_kick(cpu);
1763 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1764 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1765 __func__, GetLastError());
1766 exit(1);
1769 #endif
1772 void qemu_cpu_kick(CPUState *cpu)
1774 qemu_cond_broadcast(cpu->halt_cond);
1775 if (tcg_enabled()) {
1776 cpu_exit(cpu);
1777 /* NOP unless doing single-thread RR */
1778 qemu_cpu_kick_rr_cpu();
1779 } else {
1780 if (hax_enabled()) {
1782 * FIXME: race condition with the exit_request check in
1783 * hax_vcpu_hax_exec
1785 cpu->exit_request = 1;
1787 qemu_cpu_kick_thread(cpu);
1791 void qemu_cpu_kick_self(void)
1793 assert(current_cpu);
1794 qemu_cpu_kick_thread(current_cpu);
1797 bool qemu_cpu_is_self(CPUState *cpu)
1799 return qemu_thread_is_self(cpu->thread);
1802 bool qemu_in_vcpu_thread(void)
1804 return current_cpu && qemu_cpu_is_self(current_cpu);
1807 static __thread bool iothread_locked = false;
1809 bool qemu_mutex_iothread_locked(void)
1811 return iothread_locked;
1815 * The BQL is taken from so many places that it is worth profiling the
1816 * callers directly, instead of funneling them all through a single function.
1818 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1820 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1822 g_assert(!qemu_mutex_iothread_locked());
1823 bql_lock(&qemu_global_mutex, file, line);
1824 iothread_locked = true;
1827 void qemu_mutex_unlock_iothread(void)
1829 g_assert(qemu_mutex_iothread_locked());
1830 iothread_locked = false;
1831 qemu_mutex_unlock(&qemu_global_mutex);
1834 static bool all_vcpus_paused(void)
1836 CPUState *cpu;
1838 CPU_FOREACH(cpu) {
1839 if (!cpu->stopped) {
1840 return false;
1844 return true;
1847 void pause_all_vcpus(void)
1849 CPUState *cpu;
1851 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1852 CPU_FOREACH(cpu) {
1853 if (qemu_cpu_is_self(cpu)) {
1854 qemu_cpu_stop(cpu, true);
1855 } else {
1856 cpu->stop = true;
1857 qemu_cpu_kick(cpu);
1861 /* We need to drop the replay_lock so any vCPU threads woken up
1862 * can finish their replay tasks
1864 replay_mutex_unlock();
1866 while (!all_vcpus_paused()) {
1867 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1868 CPU_FOREACH(cpu) {
1869 qemu_cpu_kick(cpu);
1873 qemu_mutex_unlock_iothread();
1874 replay_mutex_lock();
1875 qemu_mutex_lock_iothread();
1878 void cpu_resume(CPUState *cpu)
1880 cpu->stop = false;
1881 cpu->stopped = false;
1882 qemu_cpu_kick(cpu);
1885 void resume_all_vcpus(void)
1887 CPUState *cpu;
1889 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1890 CPU_FOREACH(cpu) {
1891 cpu_resume(cpu);
1895 void cpu_remove_sync(CPUState *cpu)
1897 cpu->stop = true;
1898 cpu->unplug = true;
1899 qemu_cpu_kick(cpu);
1900 qemu_mutex_unlock_iothread();
1901 qemu_thread_join(cpu->thread);
1902 qemu_mutex_lock_iothread();
1905 /* For temporary buffers for forming a name */
1906 #define VCPU_THREAD_NAME_SIZE 16
1908 static void qemu_tcg_init_vcpu(CPUState *cpu)
1910 char thread_name[VCPU_THREAD_NAME_SIZE];
1911 static QemuCond *single_tcg_halt_cond;
1912 static QemuThread *single_tcg_cpu_thread;
1913 static int tcg_region_inited;
1915 assert(tcg_enabled());
1917 * Initialize TCG regions--once. Now is a good time, because:
1918 * (1) TCG's init context, prologue and target globals have been set up.
1919 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1920 * -accel flag is processed, so the check doesn't work then).
1922 if (!tcg_region_inited) {
1923 tcg_region_inited = 1;
1924 tcg_region_init();
1927 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1928 cpu->thread = g_malloc0(sizeof(QemuThread));
1929 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1930 qemu_cond_init(cpu->halt_cond);
1932 if (qemu_tcg_mttcg_enabled()) {
1933 /* create a thread per vCPU with TCG (MTTCG) */
1934 parallel_cpus = true;
1935 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1936 cpu->cpu_index);
1938 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1939 cpu, QEMU_THREAD_JOINABLE);
1941 } else {
1942 /* share a single thread for all cpus with TCG */
1943 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1944 qemu_thread_create(cpu->thread, thread_name,
1945 qemu_tcg_rr_cpu_thread_fn,
1946 cpu, QEMU_THREAD_JOINABLE);
1948 single_tcg_halt_cond = cpu->halt_cond;
1949 single_tcg_cpu_thread = cpu->thread;
1951 #ifdef _WIN32
1952 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1953 #endif
1954 } else {
1955 /* For non-MTTCG cases we share the thread */
1956 cpu->thread = single_tcg_cpu_thread;
1957 cpu->halt_cond = single_tcg_halt_cond;
1958 cpu->thread_id = first_cpu->thread_id;
1959 cpu->can_do_io = 1;
1960 cpu->created = true;
1964 static void qemu_hax_start_vcpu(CPUState *cpu)
1966 char thread_name[VCPU_THREAD_NAME_SIZE];
1968 cpu->thread = g_malloc0(sizeof(QemuThread));
1969 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1970 qemu_cond_init(cpu->halt_cond);
1972 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1973 cpu->cpu_index);
1974 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1975 cpu, QEMU_THREAD_JOINABLE);
1976 #ifdef _WIN32
1977 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1978 #endif
1981 static void qemu_kvm_start_vcpu(CPUState *cpu)
1983 char thread_name[VCPU_THREAD_NAME_SIZE];
1985 cpu->thread = g_malloc0(sizeof(QemuThread));
1986 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1987 qemu_cond_init(cpu->halt_cond);
1988 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1989 cpu->cpu_index);
1990 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1991 cpu, QEMU_THREAD_JOINABLE);
1994 static void qemu_hvf_start_vcpu(CPUState *cpu)
1996 char thread_name[VCPU_THREAD_NAME_SIZE];
1998 /* HVF currently does not support TCG, and only runs in
1999 * unrestricted-guest mode. */
2000 assert(hvf_enabled());
2002 cpu->thread = g_malloc0(sizeof(QemuThread));
2003 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2004 qemu_cond_init(cpu->halt_cond);
2006 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2007 cpu->cpu_index);
2008 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2009 cpu, QEMU_THREAD_JOINABLE);
2012 static void qemu_whpx_start_vcpu(CPUState *cpu)
2014 char thread_name[VCPU_THREAD_NAME_SIZE];
2016 cpu->thread = g_malloc0(sizeof(QemuThread));
2017 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2018 qemu_cond_init(cpu->halt_cond);
2019 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2020 cpu->cpu_index);
2021 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2022 cpu, QEMU_THREAD_JOINABLE);
2023 #ifdef _WIN32
2024 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2025 #endif
2028 static void qemu_dummy_start_vcpu(CPUState *cpu)
2030 char thread_name[VCPU_THREAD_NAME_SIZE];
2032 cpu->thread = g_malloc0(sizeof(QemuThread));
2033 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2034 qemu_cond_init(cpu->halt_cond);
2035 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2036 cpu->cpu_index);
2037 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2038 QEMU_THREAD_JOINABLE);
2041 void qemu_init_vcpu(CPUState *cpu)
2043 cpu->nr_cores = smp_cores;
2044 cpu->nr_threads = smp_threads;
2045 cpu->stopped = true;
2047 if (!cpu->as) {
2048 /* If the target cpu hasn't set up any address spaces itself,
2049 * give it the default one.
2051 cpu->num_ases = 1;
2052 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2055 if (kvm_enabled()) {
2056 qemu_kvm_start_vcpu(cpu);
2057 } else if (hax_enabled()) {
2058 qemu_hax_start_vcpu(cpu);
2059 } else if (hvf_enabled()) {
2060 qemu_hvf_start_vcpu(cpu);
2061 } else if (tcg_enabled()) {
2062 qemu_tcg_init_vcpu(cpu);
2063 } else if (whpx_enabled()) {
2064 qemu_whpx_start_vcpu(cpu);
2065 } else {
2066 qemu_dummy_start_vcpu(cpu);
2069 while (!cpu->created) {
2070 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2074 void cpu_stop_current(void)
2076 if (current_cpu) {
2077 qemu_cpu_stop(current_cpu, true);
2081 int vm_stop(RunState state)
2083 if (qemu_in_vcpu_thread()) {
2084 qemu_system_vmstop_request_prepare();
2085 qemu_system_vmstop_request(state);
2087 * FIXME: should not return to device code in case
2088 * vm_stop() has been requested.
2090 cpu_stop_current();
2091 return 0;
2094 return do_vm_stop(state, true);
2098 * Prepare for (re)starting the VM.
2099 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2100 * running or in case of an error condition), 0 otherwise.
2102 int vm_prepare_start(void)
2104 RunState requested;
2106 qemu_vmstop_requested(&requested);
2107 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2108 return -1;
2111 /* Ensure that a STOP/RESUME pair of events is emitted if a
2112 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2113 * example, according to documentation is always followed by
2114 * the STOP event.
2116 if (runstate_is_running()) {
2117 qapi_event_send_stop();
2118 qapi_event_send_resume();
2119 return -1;
2122 /* We are sending this now, but the CPUs will be resumed shortly later */
2123 qapi_event_send_resume();
2125 replay_enable_events();
2126 cpu_enable_ticks();
2127 runstate_set(RUN_STATE_RUNNING);
2128 vm_state_notify(1, RUN_STATE_RUNNING);
2129 return 0;
2132 void vm_start(void)
2134 if (!vm_prepare_start()) {
2135 resume_all_vcpus();
2139 /* does a state transition even if the VM is already stopped,
2140 current state is forgotten forever */
2141 int vm_stop_force_state(RunState state)
2143 if (runstate_is_running()) {
2144 return vm_stop(state);
2145 } else {
2146 runstate_set(state);
2148 bdrv_drain_all();
2149 /* Make sure to return an error if the flush in a previous vm_stop()
2150 * failed. */
2151 return bdrv_flush_all();
2155 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2157 /* XXX: implement xxx_cpu_list for targets that still miss it */
2158 #if defined(cpu_list)
2159 cpu_list(f, cpu_fprintf);
2160 #endif
2163 CpuInfoList *qmp_query_cpus(Error **errp)
2165 MachineState *ms = MACHINE(qdev_get_machine());
2166 MachineClass *mc = MACHINE_GET_CLASS(ms);
2167 CpuInfoList *head = NULL, *cur_item = NULL;
2168 CPUState *cpu;
2170 CPU_FOREACH(cpu) {
2171 CpuInfoList *info;
2172 #if defined(TARGET_I386)
2173 X86CPU *x86_cpu = X86_CPU(cpu);
2174 CPUX86State *env = &x86_cpu->env;
2175 #elif defined(TARGET_PPC)
2176 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2177 CPUPPCState *env = &ppc_cpu->env;
2178 #elif defined(TARGET_SPARC)
2179 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2180 CPUSPARCState *env = &sparc_cpu->env;
2181 #elif defined(TARGET_RISCV)
2182 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2183 CPURISCVState *env = &riscv_cpu->env;
2184 #elif defined(TARGET_MIPS)
2185 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2186 CPUMIPSState *env = &mips_cpu->env;
2187 #elif defined(TARGET_TRICORE)
2188 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2189 CPUTriCoreState *env = &tricore_cpu->env;
2190 #elif defined(TARGET_S390X)
2191 S390CPU *s390_cpu = S390_CPU(cpu);
2192 CPUS390XState *env = &s390_cpu->env;
2193 #endif
2195 cpu_synchronize_state(cpu);
2197 info = g_malloc0(sizeof(*info));
2198 info->value = g_malloc0(sizeof(*info->value));
2199 info->value->CPU = cpu->cpu_index;
2200 info->value->current = (cpu == first_cpu);
2201 info->value->halted = cpu->halted;
2202 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2203 info->value->thread_id = cpu->thread_id;
2204 #if defined(TARGET_I386)
2205 info->value->arch = CPU_INFO_ARCH_X86;
2206 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2207 #elif defined(TARGET_PPC)
2208 info->value->arch = CPU_INFO_ARCH_PPC;
2209 info->value->u.ppc.nip = env->nip;
2210 #elif defined(TARGET_SPARC)
2211 info->value->arch = CPU_INFO_ARCH_SPARC;
2212 info->value->u.q_sparc.pc = env->pc;
2213 info->value->u.q_sparc.npc = env->npc;
2214 #elif defined(TARGET_MIPS)
2215 info->value->arch = CPU_INFO_ARCH_MIPS;
2216 info->value->u.q_mips.PC = env->active_tc.PC;
2217 #elif defined(TARGET_TRICORE)
2218 info->value->arch = CPU_INFO_ARCH_TRICORE;
2219 info->value->u.tricore.PC = env->PC;
2220 #elif defined(TARGET_S390X)
2221 info->value->arch = CPU_INFO_ARCH_S390;
2222 info->value->u.s390.cpu_state = env->cpu_state;
2223 #elif defined(TARGET_RISCV)
2224 info->value->arch = CPU_INFO_ARCH_RISCV;
2225 info->value->u.riscv.pc = env->pc;
2226 #else
2227 info->value->arch = CPU_INFO_ARCH_OTHER;
2228 #endif
2229 info->value->has_props = !!mc->cpu_index_to_instance_props;
2230 if (info->value->has_props) {
2231 CpuInstanceProperties *props;
2232 props = g_malloc0(sizeof(*props));
2233 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2234 info->value->props = props;
2237 /* XXX: waiting for the qapi to support GSList */
2238 if (!cur_item) {
2239 head = cur_item = info;
2240 } else {
2241 cur_item->next = info;
2242 cur_item = info;
2246 return head;
2249 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2252 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2253 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2255 switch (target) {
2256 case SYS_EMU_TARGET_I386:
2257 case SYS_EMU_TARGET_X86_64:
2258 return CPU_INFO_ARCH_X86;
2260 case SYS_EMU_TARGET_PPC:
2261 case SYS_EMU_TARGET_PPC64:
2262 return CPU_INFO_ARCH_PPC;
2264 case SYS_EMU_TARGET_SPARC:
2265 case SYS_EMU_TARGET_SPARC64:
2266 return CPU_INFO_ARCH_SPARC;
2268 case SYS_EMU_TARGET_MIPS:
2269 case SYS_EMU_TARGET_MIPSEL:
2270 case SYS_EMU_TARGET_MIPS64:
2271 case SYS_EMU_TARGET_MIPS64EL:
2272 return CPU_INFO_ARCH_MIPS;
2274 case SYS_EMU_TARGET_TRICORE:
2275 return CPU_INFO_ARCH_TRICORE;
2277 case SYS_EMU_TARGET_S390X:
2278 return CPU_INFO_ARCH_S390;
2280 case SYS_EMU_TARGET_RISCV32:
2281 case SYS_EMU_TARGET_RISCV64:
2282 return CPU_INFO_ARCH_RISCV;
2284 default:
2285 return CPU_INFO_ARCH_OTHER;
2289 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2291 #ifdef TARGET_S390X
2292 S390CPU *s390_cpu = S390_CPU(cpu);
2293 CPUS390XState *env = &s390_cpu->env;
2295 info->cpu_state = env->cpu_state;
2296 #else
2297 abort();
2298 #endif
2302 * fast means: we NEVER interrupt vCPU threads to retrieve
2303 * information from KVM.
2305 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2307 MachineState *ms = MACHINE(qdev_get_machine());
2308 MachineClass *mc = MACHINE_GET_CLASS(ms);
2309 CpuInfoFastList *head = NULL, *cur_item = NULL;
2310 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2311 -1, &error_abort);
2312 CPUState *cpu;
2314 CPU_FOREACH(cpu) {
2315 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2316 info->value = g_malloc0(sizeof(*info->value));
2318 info->value->cpu_index = cpu->cpu_index;
2319 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2320 info->value->thread_id = cpu->thread_id;
2322 info->value->has_props = !!mc->cpu_index_to_instance_props;
2323 if (info->value->has_props) {
2324 CpuInstanceProperties *props;
2325 props = g_malloc0(sizeof(*props));
2326 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2327 info->value->props = props;
2330 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2331 info->value->target = target;
2332 if (target == SYS_EMU_TARGET_S390X) {
2333 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2336 if (!cur_item) {
2337 head = cur_item = info;
2338 } else {
2339 cur_item->next = info;
2340 cur_item = info;
2344 return head;
2347 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2348 bool has_cpu, int64_t cpu_index, Error **errp)
2350 FILE *f;
2351 uint32_t l;
2352 CPUState *cpu;
2353 uint8_t buf[1024];
2354 int64_t orig_addr = addr, orig_size = size;
2356 if (!has_cpu) {
2357 cpu_index = 0;
2360 cpu = qemu_get_cpu(cpu_index);
2361 if (cpu == NULL) {
2362 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2363 "a CPU number");
2364 return;
2367 f = fopen(filename, "wb");
2368 if (!f) {
2369 error_setg_file_open(errp, errno, filename);
2370 return;
2373 while (size != 0) {
2374 l = sizeof(buf);
2375 if (l > size)
2376 l = size;
2377 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2378 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2379 " specified", orig_addr, orig_size);
2380 goto exit;
2382 if (fwrite(buf, 1, l, f) != l) {
2383 error_setg(errp, QERR_IO_ERROR);
2384 goto exit;
2386 addr += l;
2387 size -= l;
2390 exit:
2391 fclose(f);
2394 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2395 Error **errp)
2397 FILE *f;
2398 uint32_t l;
2399 uint8_t buf[1024];
2401 f = fopen(filename, "wb");
2402 if (!f) {
2403 error_setg_file_open(errp, errno, filename);
2404 return;
2407 while (size != 0) {
2408 l = sizeof(buf);
2409 if (l > size)
2410 l = size;
2411 cpu_physical_memory_read(addr, buf, l);
2412 if (fwrite(buf, 1, l, f) != l) {
2413 error_setg(errp, QERR_IO_ERROR);
2414 goto exit;
2416 addr += l;
2417 size -= l;
2420 exit:
2421 fclose(f);
2424 void qmp_inject_nmi(Error **errp)
2426 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2429 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2431 if (!use_icount) {
2432 return;
2435 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2436 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2437 if (icount_align_option) {
2438 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2439 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2440 } else {
2441 cpu_fprintf(f, "Max guest delay NA\n");
2442 cpu_fprintf(f, "Max guest advance NA\n");