cpus: protect all icount computation with seqlock
[qemu/ar7.git] / cpus.c
blob3783651e6995c295034654d8ad714aac640ac6fb
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
57 #ifdef CONFIG_LINUX
59 #include <sys/prctl.h>
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
73 #endif /* CONFIG_LINUX */
75 int64_t max_delay;
76 int64_t max_advance;
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
86 bool cpu_is_stopped(CPUState *cpu)
88 return cpu->stopped || !runstate_is_running();
91 static bool cpu_thread_is_idle(CPUState *cpu)
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
96 if (cpu_is_stopped(cpu)) {
97 return true;
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
103 return true;
106 static bool all_cpu_threads_idle(void)
108 CPUState *cpu;
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
115 return true;
118 /***********************************************************/
119 /* guest cycle counter */
121 /* Protected by TimersState seqlock */
123 static bool icount_sleep = true;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 typedef struct TimersState {
128 /* Protected by BQL. */
129 int64_t cpu_ticks_prev;
130 int64_t cpu_ticks_offset;
132 /* cpu_clock_offset can be read out of BQL, so protect it with
133 * this lock.
135 QemuSeqLock vm_clock_seqlock;
136 int64_t cpu_clock_offset;
137 int32_t cpu_ticks_enabled;
139 /* Conversion factor from emulated instructions to virtual clock ticks. */
140 int icount_time_shift;
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
145 /* for adjusting icount */
146 int64_t vm_clock_warp_start;
147 QEMUTimer *icount_rt_timer;
148 QEMUTimer *icount_vm_timer;
149 QEMUTimer *icount_warp_timer;
150 } TimersState;
152 static TimersState timers_state;
153 bool mttcg_enabled;
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
175 static bool check_tcg_memory_orders_compatible(void)
177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179 #else
180 return false;
181 #endif
184 static bool default_mttcg_enabled(void)
186 if (use_icount || TCG_OVERSIZED_GUEST) {
187 return false;
188 } else {
189 #ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191 #else
192 return false;
193 #endif
197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
206 } else {
207 #ifndef TARGET_SUPPORTS_MTTCG
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210 #endif
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
214 error_printf("This may cause strange/hard to debug errors\n");
216 mttcg_enabled = true;
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
228 /* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
232 static int64_t cpu_get_icount_executed(CPUState *cpu)
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
242 void cpu_update_icount(CPUState *cpu)
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
247 #ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 timers_state.qemu_icount + executed);
250 #else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state.qemu_icount += executed;
252 #endif
255 static int64_t cpu_get_icount_raw_locked(void)
257 CPUState *cpu = current_cpu;
259 if (cpu && cpu->running) {
260 if (!cpu->can_do_io) {
261 error_report("Bad icount read");
262 exit(1);
264 /* Take into account what has run */
265 cpu_update_icount(cpu);
267 /* The read is protected by the seqlock, so __nocheck is okay. */
268 return atomic_read__nocheck(&timers_state.qemu_icount);
271 static int64_t cpu_get_icount_locked(void)
273 int64_t icount = cpu_get_icount_raw_locked();
274 return atomic_read__nocheck(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount);
277 int64_t cpu_get_icount_raw(void)
279 int64_t icount;
280 unsigned start;
282 do {
283 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
284 icount = cpu_get_icount_raw_locked();
285 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
287 return icount;
290 /* Return the virtual CPU time, based on the instruction counter. */
291 int64_t cpu_get_icount(void)
293 int64_t icount;
294 unsigned start;
296 do {
297 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
298 icount = cpu_get_icount_locked();
299 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
301 return icount;
304 int64_t cpu_icount_to_ns(int64_t icount)
306 return icount << atomic_read(&timers_state.icount_time_shift);
309 /* return the time elapsed in VM between vm_start and vm_stop. Unless
310 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
311 * counter.
313 * Caller must hold the BQL
315 int64_t cpu_get_ticks(void)
317 int64_t ticks;
319 if (use_icount) {
320 return cpu_get_icount();
323 ticks = timers_state.cpu_ticks_offset;
324 if (timers_state.cpu_ticks_enabled) {
325 ticks += cpu_get_host_ticks();
328 if (timers_state.cpu_ticks_prev > ticks) {
329 /* Note: non increasing ticks may happen if the host uses
330 software suspend */
331 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
332 ticks = timers_state.cpu_ticks_prev;
335 timers_state.cpu_ticks_prev = ticks;
336 return ticks;
339 static int64_t cpu_get_clock_locked(void)
341 int64_t time;
343 time = timers_state.cpu_clock_offset;
344 if (timers_state.cpu_ticks_enabled) {
345 time += get_clock();
348 return time;
351 /* Return the monotonic time elapsed in VM, i.e.,
352 * the time between vm_start and vm_stop
354 int64_t cpu_get_clock(void)
356 int64_t ti;
357 unsigned start;
359 do {
360 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
361 ti = cpu_get_clock_locked();
362 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
364 return ti;
367 /* enable cpu_get_ticks()
368 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
370 void cpu_enable_ticks(void)
372 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
373 seqlock_write_begin(&timers_state.vm_clock_seqlock);
374 if (!timers_state.cpu_ticks_enabled) {
375 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
376 timers_state.cpu_clock_offset -= get_clock();
377 timers_state.cpu_ticks_enabled = 1;
379 seqlock_write_end(&timers_state.vm_clock_seqlock);
382 /* disable cpu_get_ticks() : the clock is stopped. You must not call
383 * cpu_get_ticks() after that.
384 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
386 void cpu_disable_ticks(void)
388 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
389 seqlock_write_begin(&timers_state.vm_clock_seqlock);
390 if (timers_state.cpu_ticks_enabled) {
391 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
392 timers_state.cpu_clock_offset = cpu_get_clock_locked();
393 timers_state.cpu_ticks_enabled = 0;
395 seqlock_write_end(&timers_state.vm_clock_seqlock);
398 /* Correlation between real and virtual time is always going to be
399 fairly approximate, so ignore small variation.
400 When the guest is idle real and virtual time will be aligned in
401 the IO wait loop. */
402 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
404 static void icount_adjust(void)
406 int64_t cur_time;
407 int64_t cur_icount;
408 int64_t delta;
410 /* Protected by TimersState mutex. */
411 static int64_t last_delta;
413 /* If the VM is not running, then do nothing. */
414 if (!runstate_is_running()) {
415 return;
418 seqlock_write_begin(&timers_state.vm_clock_seqlock);
419 cur_time = cpu_get_clock_locked();
420 cur_icount = cpu_get_icount_locked();
422 delta = cur_icount - cur_time;
423 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
424 if (delta > 0
425 && last_delta + ICOUNT_WOBBLE < delta * 2
426 && timers_state.icount_time_shift > 0) {
427 /* The guest is getting too far ahead. Slow time down. */
428 atomic_set(&timers_state.icount_time_shift,
429 timers_state.icount_time_shift - 1);
431 if (delta < 0
432 && last_delta - ICOUNT_WOBBLE > delta * 2
433 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
434 /* The guest is getting too far behind. Speed time up. */
435 atomic_set(&timers_state.icount_time_shift,
436 timers_state.icount_time_shift + 1);
438 last_delta = delta;
439 atomic_set__nocheck(&timers_state.qemu_icount_bias,
440 cur_icount - (timers_state.qemu_icount
441 << timers_state.icount_time_shift));
442 seqlock_write_end(&timers_state.vm_clock_seqlock);
445 static void icount_adjust_rt(void *opaque)
447 timer_mod(timers_state.icount_rt_timer,
448 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
449 icount_adjust();
452 static void icount_adjust_vm(void *opaque)
454 timer_mod(timers_state.icount_vm_timer,
455 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
456 NANOSECONDS_PER_SECOND / 10);
457 icount_adjust();
460 static int64_t qemu_icount_round(int64_t count)
462 int shift = atomic_read(&timers_state.icount_time_shift);
463 return (count + (1 << shift) - 1) >> shift;
466 static void icount_warp_rt(void)
468 unsigned seq;
469 int64_t warp_start;
471 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
472 * changes from -1 to another value, so the race here is okay.
474 do {
475 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
476 warp_start = timers_state.vm_clock_warp_start;
477 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
479 if (warp_start == -1) {
480 return;
483 seqlock_write_begin(&timers_state.vm_clock_seqlock);
484 if (runstate_is_running()) {
485 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
486 cpu_get_clock_locked());
487 int64_t warp_delta;
489 warp_delta = clock - timers_state.vm_clock_warp_start;
490 if (use_icount == 2) {
492 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
493 * far ahead of real time.
495 int64_t cur_icount = cpu_get_icount_locked();
496 int64_t delta = clock - cur_icount;
497 warp_delta = MIN(warp_delta, delta);
499 atomic_set__nocheck(&timers_state.qemu_icount_bias,
500 timers_state.qemu_icount_bias + warp_delta);
502 timers_state.vm_clock_warp_start = -1;
503 seqlock_write_end(&timers_state.vm_clock_seqlock);
505 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
506 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
510 static void icount_timer_cb(void *opaque)
512 /* No need for a checkpoint because the timer already synchronizes
513 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
515 icount_warp_rt();
518 void qtest_clock_warp(int64_t dest)
520 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
521 AioContext *aio_context;
522 assert(qtest_enabled());
523 aio_context = qemu_get_aio_context();
524 while (clock < dest) {
525 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
526 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
528 seqlock_write_begin(&timers_state.vm_clock_seqlock);
529 atomic_set__nocheck(&timers_state.qemu_icount_bias,
530 timers_state.qemu_icount_bias + warp);
531 seqlock_write_end(&timers_state.vm_clock_seqlock);
533 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
534 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
535 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
537 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
540 void qemu_start_warp_timer(void)
542 int64_t clock;
543 int64_t deadline;
545 if (!use_icount) {
546 return;
549 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
550 * do not fire, so computing the deadline does not make sense.
552 if (!runstate_is_running()) {
553 return;
556 /* warp clock deterministically in record/replay mode */
557 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
558 return;
561 if (!all_cpu_threads_idle()) {
562 return;
565 if (qtest_enabled()) {
566 /* When testing, qtest commands advance icount. */
567 return;
570 /* We want to use the earliest deadline from ALL vm_clocks */
571 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
572 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
573 if (deadline < 0) {
574 static bool notified;
575 if (!icount_sleep && !notified) {
576 warn_report("icount sleep disabled and no active timers");
577 notified = true;
579 return;
582 if (deadline > 0) {
584 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
585 * sleep. Otherwise, the CPU might be waiting for a future timer
586 * interrupt to wake it up, but the interrupt never comes because
587 * the vCPU isn't running any insns and thus doesn't advance the
588 * QEMU_CLOCK_VIRTUAL.
590 if (!icount_sleep) {
592 * We never let VCPUs sleep in no sleep icount mode.
593 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
594 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
595 * It is useful when we want a deterministic execution time,
596 * isolated from host latencies.
598 seqlock_write_begin(&timers_state.vm_clock_seqlock);
599 atomic_set__nocheck(&timers_state.qemu_icount_bias,
600 timers_state.qemu_icount_bias + deadline);
601 seqlock_write_end(&timers_state.vm_clock_seqlock);
602 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
603 } else {
605 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
606 * "real" time, (related to the time left until the next event) has
607 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
608 * This avoids that the warps are visible externally; for example,
609 * you will not be sending network packets continuously instead of
610 * every 100ms.
612 seqlock_write_begin(&timers_state.vm_clock_seqlock);
613 if (timers_state.vm_clock_warp_start == -1
614 || timers_state.vm_clock_warp_start > clock) {
615 timers_state.vm_clock_warp_start = clock;
617 seqlock_write_end(&timers_state.vm_clock_seqlock);
618 timer_mod_anticipate(timers_state.icount_warp_timer,
619 clock + deadline);
621 } else if (deadline == 0) {
622 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
626 static void qemu_account_warp_timer(void)
628 if (!use_icount || !icount_sleep) {
629 return;
632 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
633 * do not fire, so computing the deadline does not make sense.
635 if (!runstate_is_running()) {
636 return;
639 /* warp clock deterministically in record/replay mode */
640 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
641 return;
644 timer_del(timers_state.icount_warp_timer);
645 icount_warp_rt();
648 static bool icount_state_needed(void *opaque)
650 return use_icount;
653 static bool warp_timer_state_needed(void *opaque)
655 TimersState *s = opaque;
656 return s->icount_warp_timer != NULL;
659 static bool adjust_timers_state_needed(void *opaque)
661 TimersState *s = opaque;
662 return s->icount_rt_timer != NULL;
666 * Subsection for warp timer migration is optional, because may not be created
668 static const VMStateDescription icount_vmstate_warp_timer = {
669 .name = "timer/icount/warp_timer",
670 .version_id = 1,
671 .minimum_version_id = 1,
672 .needed = warp_timer_state_needed,
673 .fields = (VMStateField[]) {
674 VMSTATE_INT64(vm_clock_warp_start, TimersState),
675 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
676 VMSTATE_END_OF_LIST()
680 static const VMStateDescription icount_vmstate_adjust_timers = {
681 .name = "timer/icount/timers",
682 .version_id = 1,
683 .minimum_version_id = 1,
684 .needed = adjust_timers_state_needed,
685 .fields = (VMStateField[]) {
686 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
687 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
688 VMSTATE_END_OF_LIST()
693 * This is a subsection for icount migration.
695 static const VMStateDescription icount_vmstate_timers = {
696 .name = "timer/icount",
697 .version_id = 1,
698 .minimum_version_id = 1,
699 .needed = icount_state_needed,
700 .fields = (VMStateField[]) {
701 VMSTATE_INT64(qemu_icount_bias, TimersState),
702 VMSTATE_INT64(qemu_icount, TimersState),
703 VMSTATE_END_OF_LIST()
705 .subsections = (const VMStateDescription*[]) {
706 &icount_vmstate_warp_timer,
707 &icount_vmstate_adjust_timers,
708 NULL
712 static const VMStateDescription vmstate_timers = {
713 .name = "timer",
714 .version_id = 2,
715 .minimum_version_id = 1,
716 .fields = (VMStateField[]) {
717 VMSTATE_INT64(cpu_ticks_offset, TimersState),
718 VMSTATE_UNUSED(8),
719 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
720 VMSTATE_END_OF_LIST()
722 .subsections = (const VMStateDescription*[]) {
723 &icount_vmstate_timers,
724 NULL
728 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
730 double pct;
731 double throttle_ratio;
732 long sleeptime_ns;
734 if (!cpu_throttle_get_percentage()) {
735 return;
738 pct = (double)cpu_throttle_get_percentage()/100;
739 throttle_ratio = pct / (1 - pct);
740 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
742 qemu_mutex_unlock_iothread();
743 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
744 qemu_mutex_lock_iothread();
745 atomic_set(&cpu->throttle_thread_scheduled, 0);
748 static void cpu_throttle_timer_tick(void *opaque)
750 CPUState *cpu;
751 double pct;
753 /* Stop the timer if needed */
754 if (!cpu_throttle_get_percentage()) {
755 return;
757 CPU_FOREACH(cpu) {
758 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
759 async_run_on_cpu(cpu, cpu_throttle_thread,
760 RUN_ON_CPU_NULL);
764 pct = (double)cpu_throttle_get_percentage()/100;
765 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
766 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
769 void cpu_throttle_set(int new_throttle_pct)
771 /* Ensure throttle percentage is within valid range */
772 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
773 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
775 atomic_set(&throttle_percentage, new_throttle_pct);
777 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
778 CPU_THROTTLE_TIMESLICE_NS);
781 void cpu_throttle_stop(void)
783 atomic_set(&throttle_percentage, 0);
786 bool cpu_throttle_active(void)
788 return (cpu_throttle_get_percentage() != 0);
791 int cpu_throttle_get_percentage(void)
793 return atomic_read(&throttle_percentage);
796 void cpu_ticks_init(void)
798 seqlock_init(&timers_state.vm_clock_seqlock);
799 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
800 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
801 cpu_throttle_timer_tick, NULL);
804 void configure_icount(QemuOpts *opts, Error **errp)
806 const char *option;
807 char *rem_str = NULL;
809 option = qemu_opt_get(opts, "shift");
810 if (!option) {
811 if (qemu_opt_get(opts, "align") != NULL) {
812 error_setg(errp, "Please specify shift option when using align");
814 return;
817 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
818 if (icount_sleep) {
819 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
820 icount_timer_cb, NULL);
823 icount_align_option = qemu_opt_get_bool(opts, "align", false);
825 if (icount_align_option && !icount_sleep) {
826 error_setg(errp, "align=on and sleep=off are incompatible");
828 if (strcmp(option, "auto") != 0) {
829 errno = 0;
830 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
831 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
832 error_setg(errp, "icount: Invalid shift value");
834 use_icount = 1;
835 return;
836 } else if (icount_align_option) {
837 error_setg(errp, "shift=auto and align=on are incompatible");
838 } else if (!icount_sleep) {
839 error_setg(errp, "shift=auto and sleep=off are incompatible");
842 use_icount = 2;
844 /* 125MIPS seems a reasonable initial guess at the guest speed.
845 It will be corrected fairly quickly anyway. */
846 timers_state.icount_time_shift = 3;
848 /* Have both realtime and virtual time triggers for speed adjustment.
849 The realtime trigger catches emulated time passing too slowly,
850 the virtual time trigger catches emulated time passing too fast.
851 Realtime triggers occur even when idle, so use them less frequently
852 than VM triggers. */
853 timers_state.vm_clock_warp_start = -1;
854 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
855 icount_adjust_rt, NULL);
856 timer_mod(timers_state.icount_rt_timer,
857 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
858 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
859 icount_adjust_vm, NULL);
860 timer_mod(timers_state.icount_vm_timer,
861 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
862 NANOSECONDS_PER_SECOND / 10);
865 /***********************************************************/
866 /* TCG vCPU kick timer
868 * The kick timer is responsible for moving single threaded vCPU
869 * emulation on to the next vCPU. If more than one vCPU is running a
870 * timer event with force a cpu->exit so the next vCPU can get
871 * scheduled.
873 * The timer is removed if all vCPUs are idle and restarted again once
874 * idleness is complete.
877 static QEMUTimer *tcg_kick_vcpu_timer;
878 static CPUState *tcg_current_rr_cpu;
880 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
882 static inline int64_t qemu_tcg_next_kick(void)
884 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
887 /* Kick the currently round-robin scheduled vCPU */
888 static void qemu_cpu_kick_rr_cpu(void)
890 CPUState *cpu;
891 do {
892 cpu = atomic_mb_read(&tcg_current_rr_cpu);
893 if (cpu) {
894 cpu_exit(cpu);
896 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
899 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
903 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
905 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
906 qemu_notify_event();
907 return;
910 if (qemu_in_vcpu_thread()) {
911 /* A CPU is currently running; kick it back out to the
912 * tcg_cpu_exec() loop so it will recalculate its
913 * icount deadline immediately.
915 qemu_cpu_kick(current_cpu);
916 } else if (first_cpu) {
917 /* qemu_cpu_kick is not enough to kick a halted CPU out of
918 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
919 * causes cpu_thread_is_idle to return false. This way,
920 * handle_icount_deadline can run.
921 * If we have no CPUs at all for some reason, we don't
922 * need to do anything.
924 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
928 static void kick_tcg_thread(void *opaque)
930 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
931 qemu_cpu_kick_rr_cpu();
934 static void start_tcg_kick_timer(void)
936 assert(!mttcg_enabled);
937 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
938 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
939 kick_tcg_thread, NULL);
940 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
944 static void stop_tcg_kick_timer(void)
946 assert(!mttcg_enabled);
947 if (tcg_kick_vcpu_timer) {
948 timer_del(tcg_kick_vcpu_timer);
949 tcg_kick_vcpu_timer = NULL;
953 /***********************************************************/
954 void hw_error(const char *fmt, ...)
956 va_list ap;
957 CPUState *cpu;
959 va_start(ap, fmt);
960 fprintf(stderr, "qemu: hardware error: ");
961 vfprintf(stderr, fmt, ap);
962 fprintf(stderr, "\n");
963 CPU_FOREACH(cpu) {
964 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
965 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
967 va_end(ap);
968 abort();
971 void cpu_synchronize_all_states(void)
973 CPUState *cpu;
975 CPU_FOREACH(cpu) {
976 cpu_synchronize_state(cpu);
977 /* TODO: move to cpu_synchronize_state() */
978 if (hvf_enabled()) {
979 hvf_cpu_synchronize_state(cpu);
984 void cpu_synchronize_all_post_reset(void)
986 CPUState *cpu;
988 CPU_FOREACH(cpu) {
989 cpu_synchronize_post_reset(cpu);
990 /* TODO: move to cpu_synchronize_post_reset() */
991 if (hvf_enabled()) {
992 hvf_cpu_synchronize_post_reset(cpu);
997 void cpu_synchronize_all_post_init(void)
999 CPUState *cpu;
1001 CPU_FOREACH(cpu) {
1002 cpu_synchronize_post_init(cpu);
1003 /* TODO: move to cpu_synchronize_post_init() */
1004 if (hvf_enabled()) {
1005 hvf_cpu_synchronize_post_init(cpu);
1010 void cpu_synchronize_all_pre_loadvm(void)
1012 CPUState *cpu;
1014 CPU_FOREACH(cpu) {
1015 cpu_synchronize_pre_loadvm(cpu);
1019 static int do_vm_stop(RunState state, bool send_stop)
1021 int ret = 0;
1023 if (runstate_is_running()) {
1024 cpu_disable_ticks();
1025 pause_all_vcpus();
1026 runstate_set(state);
1027 vm_state_notify(0, state);
1028 if (send_stop) {
1029 qapi_event_send_stop(&error_abort);
1033 bdrv_drain_all();
1034 replay_disable_events();
1035 ret = bdrv_flush_all();
1037 return ret;
1040 /* Special vm_stop() variant for terminating the process. Historically clients
1041 * did not expect a QMP STOP event and so we need to retain compatibility.
1043 int vm_shutdown(void)
1045 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1048 static bool cpu_can_run(CPUState *cpu)
1050 if (cpu->stop) {
1051 return false;
1053 if (cpu_is_stopped(cpu)) {
1054 return false;
1056 return true;
1059 static void cpu_handle_guest_debug(CPUState *cpu)
1061 gdb_set_stop_cpu(cpu);
1062 qemu_system_debug_request();
1063 cpu->stopped = true;
1066 #ifdef CONFIG_LINUX
1067 static void sigbus_reraise(void)
1069 sigset_t set;
1070 struct sigaction action;
1072 memset(&action, 0, sizeof(action));
1073 action.sa_handler = SIG_DFL;
1074 if (!sigaction(SIGBUS, &action, NULL)) {
1075 raise(SIGBUS);
1076 sigemptyset(&set);
1077 sigaddset(&set, SIGBUS);
1078 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1080 perror("Failed to re-raise SIGBUS!\n");
1081 abort();
1084 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1086 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1087 sigbus_reraise();
1090 if (current_cpu) {
1091 /* Called asynchronously in VCPU thread. */
1092 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1093 sigbus_reraise();
1095 } else {
1096 /* Called synchronously (via signalfd) in main thread. */
1097 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1098 sigbus_reraise();
1103 static void qemu_init_sigbus(void)
1105 struct sigaction action;
1107 memset(&action, 0, sizeof(action));
1108 action.sa_flags = SA_SIGINFO;
1109 action.sa_sigaction = sigbus_handler;
1110 sigaction(SIGBUS, &action, NULL);
1112 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1114 #else /* !CONFIG_LINUX */
1115 static void qemu_init_sigbus(void)
1118 #endif /* !CONFIG_LINUX */
1120 static QemuMutex qemu_global_mutex;
1122 static QemuThread io_thread;
1124 /* cpu creation */
1125 static QemuCond qemu_cpu_cond;
1126 /* system init */
1127 static QemuCond qemu_pause_cond;
1129 void qemu_init_cpu_loop(void)
1131 qemu_init_sigbus();
1132 qemu_cond_init(&qemu_cpu_cond);
1133 qemu_cond_init(&qemu_pause_cond);
1134 qemu_mutex_init(&qemu_global_mutex);
1136 qemu_thread_get_self(&io_thread);
1139 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1141 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1144 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1146 if (kvm_destroy_vcpu(cpu) < 0) {
1147 error_report("kvm_destroy_vcpu failed");
1148 exit(EXIT_FAILURE);
1152 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1156 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1158 g_assert(qemu_cpu_is_self(cpu));
1159 cpu->stop = false;
1160 cpu->stopped = true;
1161 if (exit) {
1162 cpu_exit(cpu);
1164 qemu_cond_broadcast(&qemu_pause_cond);
1167 static void qemu_wait_io_event_common(CPUState *cpu)
1169 atomic_mb_set(&cpu->thread_kicked, false);
1170 if (cpu->stop) {
1171 qemu_cpu_stop(cpu, false);
1173 process_queued_cpu_work(cpu);
1176 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1178 while (all_cpu_threads_idle()) {
1179 stop_tcg_kick_timer();
1180 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1183 start_tcg_kick_timer();
1185 qemu_wait_io_event_common(cpu);
1188 static void qemu_wait_io_event(CPUState *cpu)
1190 while (cpu_thread_is_idle(cpu)) {
1191 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1194 #ifdef _WIN32
1195 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1196 if (!tcg_enabled()) {
1197 SleepEx(0, TRUE);
1199 #endif
1200 qemu_wait_io_event_common(cpu);
1203 static void *qemu_kvm_cpu_thread_fn(void *arg)
1205 CPUState *cpu = arg;
1206 int r;
1208 rcu_register_thread();
1210 qemu_mutex_lock_iothread();
1211 qemu_thread_get_self(cpu->thread);
1212 cpu->thread_id = qemu_get_thread_id();
1213 cpu->can_do_io = 1;
1214 current_cpu = cpu;
1216 r = kvm_init_vcpu(cpu);
1217 if (r < 0) {
1218 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1219 exit(1);
1222 kvm_init_cpu_signals(cpu);
1224 /* signal CPU creation */
1225 cpu->created = true;
1226 qemu_cond_signal(&qemu_cpu_cond);
1228 do {
1229 if (cpu_can_run(cpu)) {
1230 r = kvm_cpu_exec(cpu);
1231 if (r == EXCP_DEBUG) {
1232 cpu_handle_guest_debug(cpu);
1235 qemu_wait_io_event(cpu);
1236 } while (!cpu->unplug || cpu_can_run(cpu));
1238 qemu_kvm_destroy_vcpu(cpu);
1239 cpu->created = false;
1240 qemu_cond_signal(&qemu_cpu_cond);
1241 qemu_mutex_unlock_iothread();
1242 rcu_unregister_thread();
1243 return NULL;
1246 static void *qemu_dummy_cpu_thread_fn(void *arg)
1248 #ifdef _WIN32
1249 error_report("qtest is not supported under Windows");
1250 exit(1);
1251 #else
1252 CPUState *cpu = arg;
1253 sigset_t waitset;
1254 int r;
1256 rcu_register_thread();
1258 qemu_mutex_lock_iothread();
1259 qemu_thread_get_self(cpu->thread);
1260 cpu->thread_id = qemu_get_thread_id();
1261 cpu->can_do_io = 1;
1262 current_cpu = cpu;
1264 sigemptyset(&waitset);
1265 sigaddset(&waitset, SIG_IPI);
1267 /* signal CPU creation */
1268 cpu->created = true;
1269 qemu_cond_signal(&qemu_cpu_cond);
1271 do {
1272 qemu_mutex_unlock_iothread();
1273 do {
1274 int sig;
1275 r = sigwait(&waitset, &sig);
1276 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1277 if (r == -1) {
1278 perror("sigwait");
1279 exit(1);
1281 qemu_mutex_lock_iothread();
1282 qemu_wait_io_event(cpu);
1283 } while (!cpu->unplug);
1285 rcu_unregister_thread();
1286 return NULL;
1287 #endif
1290 static int64_t tcg_get_icount_limit(void)
1292 int64_t deadline;
1294 if (replay_mode != REPLAY_MODE_PLAY) {
1295 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1297 /* Maintain prior (possibly buggy) behaviour where if no deadline
1298 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1299 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1300 * nanoseconds.
1302 if ((deadline < 0) || (deadline > INT32_MAX)) {
1303 deadline = INT32_MAX;
1306 return qemu_icount_round(deadline);
1307 } else {
1308 return replay_get_instructions();
1312 static void handle_icount_deadline(void)
1314 assert(qemu_in_vcpu_thread());
1315 if (use_icount) {
1316 int64_t deadline =
1317 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1319 if (deadline == 0) {
1320 /* Wake up other AioContexts. */
1321 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1322 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1327 static void prepare_icount_for_run(CPUState *cpu)
1329 if (use_icount) {
1330 int insns_left;
1332 /* These should always be cleared by process_icount_data after
1333 * each vCPU execution. However u16.high can be raised
1334 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1336 g_assert(cpu->icount_decr.u16.low == 0);
1337 g_assert(cpu->icount_extra == 0);
1339 cpu->icount_budget = tcg_get_icount_limit();
1340 insns_left = MIN(0xffff, cpu->icount_budget);
1341 cpu->icount_decr.u16.low = insns_left;
1342 cpu->icount_extra = cpu->icount_budget - insns_left;
1344 replay_mutex_lock();
1348 static void process_icount_data(CPUState *cpu)
1350 if (use_icount) {
1351 /* Account for executed instructions */
1352 cpu_update_icount(cpu);
1354 /* Reset the counters */
1355 cpu->icount_decr.u16.low = 0;
1356 cpu->icount_extra = 0;
1357 cpu->icount_budget = 0;
1359 replay_account_executed_instructions();
1361 replay_mutex_unlock();
1366 static int tcg_cpu_exec(CPUState *cpu)
1368 int ret;
1369 #ifdef CONFIG_PROFILER
1370 int64_t ti;
1371 #endif
1373 assert(tcg_enabled());
1374 #ifdef CONFIG_PROFILER
1375 ti = profile_getclock();
1376 #endif
1377 cpu_exec_start(cpu);
1378 ret = cpu_exec(cpu);
1379 cpu_exec_end(cpu);
1380 #ifdef CONFIG_PROFILER
1381 tcg_time += profile_getclock() - ti;
1382 #endif
1383 return ret;
1386 /* Destroy any remaining vCPUs which have been unplugged and have
1387 * finished running
1389 static void deal_with_unplugged_cpus(void)
1391 CPUState *cpu;
1393 CPU_FOREACH(cpu) {
1394 if (cpu->unplug && !cpu_can_run(cpu)) {
1395 qemu_tcg_destroy_vcpu(cpu);
1396 cpu->created = false;
1397 qemu_cond_signal(&qemu_cpu_cond);
1398 break;
1403 /* Single-threaded TCG
1405 * In the single-threaded case each vCPU is simulated in turn. If
1406 * there is more than a single vCPU we create a simple timer to kick
1407 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1408 * This is done explicitly rather than relying on side-effects
1409 * elsewhere.
1412 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1414 CPUState *cpu = arg;
1416 assert(tcg_enabled());
1417 rcu_register_thread();
1418 tcg_register_thread();
1420 qemu_mutex_lock_iothread();
1421 qemu_thread_get_self(cpu->thread);
1423 cpu->thread_id = qemu_get_thread_id();
1424 cpu->created = true;
1425 cpu->can_do_io = 1;
1426 qemu_cond_signal(&qemu_cpu_cond);
1428 /* wait for initial kick-off after machine start */
1429 while (first_cpu->stopped) {
1430 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1432 /* process any pending work */
1433 CPU_FOREACH(cpu) {
1434 current_cpu = cpu;
1435 qemu_wait_io_event_common(cpu);
1439 start_tcg_kick_timer();
1441 cpu = first_cpu;
1443 /* process any pending work */
1444 cpu->exit_request = 1;
1446 while (1) {
1447 qemu_mutex_unlock_iothread();
1448 replay_mutex_lock();
1449 qemu_mutex_lock_iothread();
1450 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1451 qemu_account_warp_timer();
1453 /* Run the timers here. This is much more efficient than
1454 * waking up the I/O thread and waiting for completion.
1456 handle_icount_deadline();
1458 replay_mutex_unlock();
1460 if (!cpu) {
1461 cpu = first_cpu;
1464 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1466 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1467 current_cpu = cpu;
1469 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1470 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1472 if (cpu_can_run(cpu)) {
1473 int r;
1475 qemu_mutex_unlock_iothread();
1476 prepare_icount_for_run(cpu);
1478 r = tcg_cpu_exec(cpu);
1480 process_icount_data(cpu);
1481 qemu_mutex_lock_iothread();
1483 if (r == EXCP_DEBUG) {
1484 cpu_handle_guest_debug(cpu);
1485 break;
1486 } else if (r == EXCP_ATOMIC) {
1487 qemu_mutex_unlock_iothread();
1488 cpu_exec_step_atomic(cpu);
1489 qemu_mutex_lock_iothread();
1490 break;
1492 } else if (cpu->stop) {
1493 if (cpu->unplug) {
1494 cpu = CPU_NEXT(cpu);
1496 break;
1499 cpu = CPU_NEXT(cpu);
1500 } /* while (cpu && !cpu->exit_request).. */
1502 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1503 atomic_set(&tcg_current_rr_cpu, NULL);
1505 if (cpu && cpu->exit_request) {
1506 atomic_mb_set(&cpu->exit_request, 0);
1509 qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
1510 deal_with_unplugged_cpus();
1513 rcu_unregister_thread();
1514 return NULL;
1517 static void *qemu_hax_cpu_thread_fn(void *arg)
1519 CPUState *cpu = arg;
1520 int r;
1522 rcu_register_thread();
1523 qemu_mutex_lock_iothread();
1524 qemu_thread_get_self(cpu->thread);
1526 cpu->thread_id = qemu_get_thread_id();
1527 cpu->created = true;
1528 cpu->halted = 0;
1529 current_cpu = cpu;
1531 hax_init_vcpu(cpu);
1532 qemu_cond_signal(&qemu_cpu_cond);
1534 do {
1535 if (cpu_can_run(cpu)) {
1536 r = hax_smp_cpu_exec(cpu);
1537 if (r == EXCP_DEBUG) {
1538 cpu_handle_guest_debug(cpu);
1542 qemu_wait_io_event(cpu);
1543 } while (!cpu->unplug || cpu_can_run(cpu));
1544 rcu_unregister_thread();
1545 return NULL;
1548 /* The HVF-specific vCPU thread function. This one should only run when the host
1549 * CPU supports the VMX "unrestricted guest" feature. */
1550 static void *qemu_hvf_cpu_thread_fn(void *arg)
1552 CPUState *cpu = arg;
1554 int r;
1556 assert(hvf_enabled());
1558 rcu_register_thread();
1560 qemu_mutex_lock_iothread();
1561 qemu_thread_get_self(cpu->thread);
1563 cpu->thread_id = qemu_get_thread_id();
1564 cpu->can_do_io = 1;
1565 current_cpu = cpu;
1567 hvf_init_vcpu(cpu);
1569 /* signal CPU creation */
1570 cpu->created = true;
1571 qemu_cond_signal(&qemu_cpu_cond);
1573 do {
1574 if (cpu_can_run(cpu)) {
1575 r = hvf_vcpu_exec(cpu);
1576 if (r == EXCP_DEBUG) {
1577 cpu_handle_guest_debug(cpu);
1580 qemu_wait_io_event(cpu);
1581 } while (!cpu->unplug || cpu_can_run(cpu));
1583 hvf_vcpu_destroy(cpu);
1584 cpu->created = false;
1585 qemu_cond_signal(&qemu_cpu_cond);
1586 qemu_mutex_unlock_iothread();
1587 rcu_unregister_thread();
1588 return NULL;
1591 static void *qemu_whpx_cpu_thread_fn(void *arg)
1593 CPUState *cpu = arg;
1594 int r;
1596 rcu_register_thread();
1598 qemu_mutex_lock_iothread();
1599 qemu_thread_get_self(cpu->thread);
1600 cpu->thread_id = qemu_get_thread_id();
1601 current_cpu = cpu;
1603 r = whpx_init_vcpu(cpu);
1604 if (r < 0) {
1605 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1606 exit(1);
1609 /* signal CPU creation */
1610 cpu->created = true;
1611 qemu_cond_signal(&qemu_cpu_cond);
1613 do {
1614 if (cpu_can_run(cpu)) {
1615 r = whpx_vcpu_exec(cpu);
1616 if (r == EXCP_DEBUG) {
1617 cpu_handle_guest_debug(cpu);
1620 while (cpu_thread_is_idle(cpu)) {
1621 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1623 qemu_wait_io_event_common(cpu);
1624 } while (!cpu->unplug || cpu_can_run(cpu));
1626 whpx_destroy_vcpu(cpu);
1627 cpu->created = false;
1628 qemu_cond_signal(&qemu_cpu_cond);
1629 qemu_mutex_unlock_iothread();
1630 rcu_unregister_thread();
1631 return NULL;
1634 #ifdef _WIN32
1635 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1638 #endif
1640 /* Multi-threaded TCG
1642 * In the multi-threaded case each vCPU has its own thread. The TLS
1643 * variable current_cpu can be used deep in the code to find the
1644 * current CPUState for a given thread.
1647 static void *qemu_tcg_cpu_thread_fn(void *arg)
1649 CPUState *cpu = arg;
1651 assert(tcg_enabled());
1652 g_assert(!use_icount);
1654 rcu_register_thread();
1655 tcg_register_thread();
1657 qemu_mutex_lock_iothread();
1658 qemu_thread_get_self(cpu->thread);
1660 cpu->thread_id = qemu_get_thread_id();
1661 cpu->created = true;
1662 cpu->can_do_io = 1;
1663 current_cpu = cpu;
1664 qemu_cond_signal(&qemu_cpu_cond);
1666 /* process any pending work */
1667 cpu->exit_request = 1;
1669 do {
1670 if (cpu_can_run(cpu)) {
1671 int r;
1672 qemu_mutex_unlock_iothread();
1673 r = tcg_cpu_exec(cpu);
1674 qemu_mutex_lock_iothread();
1675 switch (r) {
1676 case EXCP_DEBUG:
1677 cpu_handle_guest_debug(cpu);
1678 break;
1679 case EXCP_HALTED:
1680 /* during start-up the vCPU is reset and the thread is
1681 * kicked several times. If we don't ensure we go back
1682 * to sleep in the halted state we won't cleanly
1683 * start-up when the vCPU is enabled.
1685 * cpu->halted should ensure we sleep in wait_io_event
1687 g_assert(cpu->halted);
1688 break;
1689 case EXCP_ATOMIC:
1690 qemu_mutex_unlock_iothread();
1691 cpu_exec_step_atomic(cpu);
1692 qemu_mutex_lock_iothread();
1693 default:
1694 /* Ignore everything else? */
1695 break;
1699 atomic_mb_set(&cpu->exit_request, 0);
1700 qemu_wait_io_event(cpu);
1701 } while (!cpu->unplug || cpu_can_run(cpu));
1703 qemu_tcg_destroy_vcpu(cpu);
1704 cpu->created = false;
1705 qemu_cond_signal(&qemu_cpu_cond);
1706 qemu_mutex_unlock_iothread();
1707 rcu_unregister_thread();
1708 return NULL;
1711 static void qemu_cpu_kick_thread(CPUState *cpu)
1713 #ifndef _WIN32
1714 int err;
1716 if (cpu->thread_kicked) {
1717 return;
1719 cpu->thread_kicked = true;
1720 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1721 if (err) {
1722 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1723 exit(1);
1725 #else /* _WIN32 */
1726 if (!qemu_cpu_is_self(cpu)) {
1727 if (whpx_enabled()) {
1728 whpx_vcpu_kick(cpu);
1729 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1730 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1731 __func__, GetLastError());
1732 exit(1);
1735 #endif
1738 void qemu_cpu_kick(CPUState *cpu)
1740 qemu_cond_broadcast(cpu->halt_cond);
1741 if (tcg_enabled()) {
1742 cpu_exit(cpu);
1743 /* NOP unless doing single-thread RR */
1744 qemu_cpu_kick_rr_cpu();
1745 } else {
1746 if (hax_enabled()) {
1748 * FIXME: race condition with the exit_request check in
1749 * hax_vcpu_hax_exec
1751 cpu->exit_request = 1;
1753 qemu_cpu_kick_thread(cpu);
1757 void qemu_cpu_kick_self(void)
1759 assert(current_cpu);
1760 qemu_cpu_kick_thread(current_cpu);
1763 bool qemu_cpu_is_self(CPUState *cpu)
1765 return qemu_thread_is_self(cpu->thread);
1768 bool qemu_in_vcpu_thread(void)
1770 return current_cpu && qemu_cpu_is_self(current_cpu);
1773 static __thread bool iothread_locked = false;
1775 bool qemu_mutex_iothread_locked(void)
1777 return iothread_locked;
1781 * The BQL is taken from so many places that it is worth profiling the
1782 * callers directly, instead of funneling them all through a single function.
1784 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1786 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1788 g_assert(!qemu_mutex_iothread_locked());
1789 bql_lock(&qemu_global_mutex, file, line);
1790 iothread_locked = true;
1793 void qemu_mutex_unlock_iothread(void)
1795 g_assert(qemu_mutex_iothread_locked());
1796 iothread_locked = false;
1797 qemu_mutex_unlock(&qemu_global_mutex);
1800 static bool all_vcpus_paused(void)
1802 CPUState *cpu;
1804 CPU_FOREACH(cpu) {
1805 if (!cpu->stopped) {
1806 return false;
1810 return true;
1813 void pause_all_vcpus(void)
1815 CPUState *cpu;
1817 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1818 CPU_FOREACH(cpu) {
1819 if (qemu_cpu_is_self(cpu)) {
1820 qemu_cpu_stop(cpu, true);
1821 } else {
1822 cpu->stop = true;
1823 qemu_cpu_kick(cpu);
1827 /* We need to drop the replay_lock so any vCPU threads woken up
1828 * can finish their replay tasks
1830 replay_mutex_unlock();
1832 while (!all_vcpus_paused()) {
1833 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1834 CPU_FOREACH(cpu) {
1835 qemu_cpu_kick(cpu);
1839 qemu_mutex_unlock_iothread();
1840 replay_mutex_lock();
1841 qemu_mutex_lock_iothread();
1844 void cpu_resume(CPUState *cpu)
1846 cpu->stop = false;
1847 cpu->stopped = false;
1848 qemu_cpu_kick(cpu);
1851 void resume_all_vcpus(void)
1853 CPUState *cpu;
1855 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1856 CPU_FOREACH(cpu) {
1857 cpu_resume(cpu);
1861 void cpu_remove_sync(CPUState *cpu)
1863 cpu->stop = true;
1864 cpu->unplug = true;
1865 qemu_cpu_kick(cpu);
1866 qemu_mutex_unlock_iothread();
1867 qemu_thread_join(cpu->thread);
1868 qemu_mutex_lock_iothread();
1871 /* For temporary buffers for forming a name */
1872 #define VCPU_THREAD_NAME_SIZE 16
1874 static void qemu_tcg_init_vcpu(CPUState *cpu)
1876 char thread_name[VCPU_THREAD_NAME_SIZE];
1877 static QemuCond *single_tcg_halt_cond;
1878 static QemuThread *single_tcg_cpu_thread;
1879 static int tcg_region_inited;
1881 assert(tcg_enabled());
1883 * Initialize TCG regions--once. Now is a good time, because:
1884 * (1) TCG's init context, prologue and target globals have been set up.
1885 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1886 * -accel flag is processed, so the check doesn't work then).
1888 if (!tcg_region_inited) {
1889 tcg_region_inited = 1;
1890 tcg_region_init();
1893 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1894 cpu->thread = g_malloc0(sizeof(QemuThread));
1895 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1896 qemu_cond_init(cpu->halt_cond);
1898 if (qemu_tcg_mttcg_enabled()) {
1899 /* create a thread per vCPU with TCG (MTTCG) */
1900 parallel_cpus = true;
1901 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1902 cpu->cpu_index);
1904 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1905 cpu, QEMU_THREAD_JOINABLE);
1907 } else {
1908 /* share a single thread for all cpus with TCG */
1909 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1910 qemu_thread_create(cpu->thread, thread_name,
1911 qemu_tcg_rr_cpu_thread_fn,
1912 cpu, QEMU_THREAD_JOINABLE);
1914 single_tcg_halt_cond = cpu->halt_cond;
1915 single_tcg_cpu_thread = cpu->thread;
1917 #ifdef _WIN32
1918 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1919 #endif
1920 } else {
1921 /* For non-MTTCG cases we share the thread */
1922 cpu->thread = single_tcg_cpu_thread;
1923 cpu->halt_cond = single_tcg_halt_cond;
1924 cpu->thread_id = first_cpu->thread_id;
1925 cpu->can_do_io = 1;
1926 cpu->created = true;
1930 static void qemu_hax_start_vcpu(CPUState *cpu)
1932 char thread_name[VCPU_THREAD_NAME_SIZE];
1934 cpu->thread = g_malloc0(sizeof(QemuThread));
1935 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1936 qemu_cond_init(cpu->halt_cond);
1938 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1939 cpu->cpu_index);
1940 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1941 cpu, QEMU_THREAD_JOINABLE);
1942 #ifdef _WIN32
1943 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1944 #endif
1947 static void qemu_kvm_start_vcpu(CPUState *cpu)
1949 char thread_name[VCPU_THREAD_NAME_SIZE];
1951 cpu->thread = g_malloc0(sizeof(QemuThread));
1952 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1953 qemu_cond_init(cpu->halt_cond);
1954 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1955 cpu->cpu_index);
1956 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1957 cpu, QEMU_THREAD_JOINABLE);
1960 static void qemu_hvf_start_vcpu(CPUState *cpu)
1962 char thread_name[VCPU_THREAD_NAME_SIZE];
1964 /* HVF currently does not support TCG, and only runs in
1965 * unrestricted-guest mode. */
1966 assert(hvf_enabled());
1968 cpu->thread = g_malloc0(sizeof(QemuThread));
1969 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1970 qemu_cond_init(cpu->halt_cond);
1972 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1973 cpu->cpu_index);
1974 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1975 cpu, QEMU_THREAD_JOINABLE);
1978 static void qemu_whpx_start_vcpu(CPUState *cpu)
1980 char thread_name[VCPU_THREAD_NAME_SIZE];
1982 cpu->thread = g_malloc0(sizeof(QemuThread));
1983 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1984 qemu_cond_init(cpu->halt_cond);
1985 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
1986 cpu->cpu_index);
1987 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
1988 cpu, QEMU_THREAD_JOINABLE);
1989 #ifdef _WIN32
1990 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1991 #endif
1994 static void qemu_dummy_start_vcpu(CPUState *cpu)
1996 char thread_name[VCPU_THREAD_NAME_SIZE];
1998 cpu->thread = g_malloc0(sizeof(QemuThread));
1999 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2000 qemu_cond_init(cpu->halt_cond);
2001 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2002 cpu->cpu_index);
2003 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2004 QEMU_THREAD_JOINABLE);
2007 void qemu_init_vcpu(CPUState *cpu)
2009 cpu->nr_cores = smp_cores;
2010 cpu->nr_threads = smp_threads;
2011 cpu->stopped = true;
2013 if (!cpu->as) {
2014 /* If the target cpu hasn't set up any address spaces itself,
2015 * give it the default one.
2017 cpu->num_ases = 1;
2018 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2021 if (kvm_enabled()) {
2022 qemu_kvm_start_vcpu(cpu);
2023 } else if (hax_enabled()) {
2024 qemu_hax_start_vcpu(cpu);
2025 } else if (hvf_enabled()) {
2026 qemu_hvf_start_vcpu(cpu);
2027 } else if (tcg_enabled()) {
2028 qemu_tcg_init_vcpu(cpu);
2029 } else if (whpx_enabled()) {
2030 qemu_whpx_start_vcpu(cpu);
2031 } else {
2032 qemu_dummy_start_vcpu(cpu);
2035 while (!cpu->created) {
2036 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2040 void cpu_stop_current(void)
2042 if (current_cpu) {
2043 qemu_cpu_stop(current_cpu, true);
2047 int vm_stop(RunState state)
2049 if (qemu_in_vcpu_thread()) {
2050 qemu_system_vmstop_request_prepare();
2051 qemu_system_vmstop_request(state);
2053 * FIXME: should not return to device code in case
2054 * vm_stop() has been requested.
2056 cpu_stop_current();
2057 return 0;
2060 return do_vm_stop(state, true);
2064 * Prepare for (re)starting the VM.
2065 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2066 * running or in case of an error condition), 0 otherwise.
2068 int vm_prepare_start(void)
2070 RunState requested;
2072 qemu_vmstop_requested(&requested);
2073 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2074 return -1;
2077 /* Ensure that a STOP/RESUME pair of events is emitted if a
2078 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2079 * example, according to documentation is always followed by
2080 * the STOP event.
2082 if (runstate_is_running()) {
2083 qapi_event_send_stop(&error_abort);
2084 qapi_event_send_resume(&error_abort);
2085 return -1;
2088 /* We are sending this now, but the CPUs will be resumed shortly later */
2089 qapi_event_send_resume(&error_abort);
2091 replay_enable_events();
2092 cpu_enable_ticks();
2093 runstate_set(RUN_STATE_RUNNING);
2094 vm_state_notify(1, RUN_STATE_RUNNING);
2095 return 0;
2098 void vm_start(void)
2100 if (!vm_prepare_start()) {
2101 resume_all_vcpus();
2105 /* does a state transition even if the VM is already stopped,
2106 current state is forgotten forever */
2107 int vm_stop_force_state(RunState state)
2109 if (runstate_is_running()) {
2110 return vm_stop(state);
2111 } else {
2112 runstate_set(state);
2114 bdrv_drain_all();
2115 /* Make sure to return an error if the flush in a previous vm_stop()
2116 * failed. */
2117 return bdrv_flush_all();
2121 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2123 /* XXX: implement xxx_cpu_list for targets that still miss it */
2124 #if defined(cpu_list)
2125 cpu_list(f, cpu_fprintf);
2126 #endif
2129 CpuInfoList *qmp_query_cpus(Error **errp)
2131 MachineState *ms = MACHINE(qdev_get_machine());
2132 MachineClass *mc = MACHINE_GET_CLASS(ms);
2133 CpuInfoList *head = NULL, *cur_item = NULL;
2134 CPUState *cpu;
2136 CPU_FOREACH(cpu) {
2137 CpuInfoList *info;
2138 #if defined(TARGET_I386)
2139 X86CPU *x86_cpu = X86_CPU(cpu);
2140 CPUX86State *env = &x86_cpu->env;
2141 #elif defined(TARGET_PPC)
2142 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2143 CPUPPCState *env = &ppc_cpu->env;
2144 #elif defined(TARGET_SPARC)
2145 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2146 CPUSPARCState *env = &sparc_cpu->env;
2147 #elif defined(TARGET_RISCV)
2148 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2149 CPURISCVState *env = &riscv_cpu->env;
2150 #elif defined(TARGET_MIPS)
2151 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2152 CPUMIPSState *env = &mips_cpu->env;
2153 #elif defined(TARGET_TRICORE)
2154 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2155 CPUTriCoreState *env = &tricore_cpu->env;
2156 #elif defined(TARGET_S390X)
2157 S390CPU *s390_cpu = S390_CPU(cpu);
2158 CPUS390XState *env = &s390_cpu->env;
2159 #endif
2161 cpu_synchronize_state(cpu);
2163 info = g_malloc0(sizeof(*info));
2164 info->value = g_malloc0(sizeof(*info->value));
2165 info->value->CPU = cpu->cpu_index;
2166 info->value->current = (cpu == first_cpu);
2167 info->value->halted = cpu->halted;
2168 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2169 info->value->thread_id = cpu->thread_id;
2170 #if defined(TARGET_I386)
2171 info->value->arch = CPU_INFO_ARCH_X86;
2172 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2173 #elif defined(TARGET_PPC)
2174 info->value->arch = CPU_INFO_ARCH_PPC;
2175 info->value->u.ppc.nip = env->nip;
2176 #elif defined(TARGET_SPARC)
2177 info->value->arch = CPU_INFO_ARCH_SPARC;
2178 info->value->u.q_sparc.pc = env->pc;
2179 info->value->u.q_sparc.npc = env->npc;
2180 #elif defined(TARGET_MIPS)
2181 info->value->arch = CPU_INFO_ARCH_MIPS;
2182 info->value->u.q_mips.PC = env->active_tc.PC;
2183 #elif defined(TARGET_TRICORE)
2184 info->value->arch = CPU_INFO_ARCH_TRICORE;
2185 info->value->u.tricore.PC = env->PC;
2186 #elif defined(TARGET_S390X)
2187 info->value->arch = CPU_INFO_ARCH_S390;
2188 info->value->u.s390.cpu_state = env->cpu_state;
2189 #elif defined(TARGET_RISCV)
2190 info->value->arch = CPU_INFO_ARCH_RISCV;
2191 info->value->u.riscv.pc = env->pc;
2192 #else
2193 info->value->arch = CPU_INFO_ARCH_OTHER;
2194 #endif
2195 info->value->has_props = !!mc->cpu_index_to_instance_props;
2196 if (info->value->has_props) {
2197 CpuInstanceProperties *props;
2198 props = g_malloc0(sizeof(*props));
2199 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2200 info->value->props = props;
2203 /* XXX: waiting for the qapi to support GSList */
2204 if (!cur_item) {
2205 head = cur_item = info;
2206 } else {
2207 cur_item->next = info;
2208 cur_item = info;
2212 return head;
2215 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2218 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2219 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2221 switch (target) {
2222 case SYS_EMU_TARGET_I386:
2223 case SYS_EMU_TARGET_X86_64:
2224 return CPU_INFO_ARCH_X86;
2226 case SYS_EMU_TARGET_PPC:
2227 case SYS_EMU_TARGET_PPCEMB:
2228 case SYS_EMU_TARGET_PPC64:
2229 return CPU_INFO_ARCH_PPC;
2231 case SYS_EMU_TARGET_SPARC:
2232 case SYS_EMU_TARGET_SPARC64:
2233 return CPU_INFO_ARCH_SPARC;
2235 case SYS_EMU_TARGET_MIPS:
2236 case SYS_EMU_TARGET_MIPSEL:
2237 case SYS_EMU_TARGET_MIPS64:
2238 case SYS_EMU_TARGET_MIPS64EL:
2239 return CPU_INFO_ARCH_MIPS;
2241 case SYS_EMU_TARGET_TRICORE:
2242 return CPU_INFO_ARCH_TRICORE;
2244 case SYS_EMU_TARGET_S390X:
2245 return CPU_INFO_ARCH_S390;
2247 case SYS_EMU_TARGET_RISCV32:
2248 case SYS_EMU_TARGET_RISCV64:
2249 return CPU_INFO_ARCH_RISCV;
2251 default:
2252 return CPU_INFO_ARCH_OTHER;
2256 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2258 #ifdef TARGET_S390X
2259 S390CPU *s390_cpu = S390_CPU(cpu);
2260 CPUS390XState *env = &s390_cpu->env;
2262 info->cpu_state = env->cpu_state;
2263 #else
2264 abort();
2265 #endif
2269 * fast means: we NEVER interrupt vCPU threads to retrieve
2270 * information from KVM.
2272 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2274 MachineState *ms = MACHINE(qdev_get_machine());
2275 MachineClass *mc = MACHINE_GET_CLASS(ms);
2276 CpuInfoFastList *head = NULL, *cur_item = NULL;
2277 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2278 -1, &error_abort);
2279 CPUState *cpu;
2281 CPU_FOREACH(cpu) {
2282 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2283 info->value = g_malloc0(sizeof(*info->value));
2285 info->value->cpu_index = cpu->cpu_index;
2286 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2287 info->value->thread_id = cpu->thread_id;
2289 info->value->has_props = !!mc->cpu_index_to_instance_props;
2290 if (info->value->has_props) {
2291 CpuInstanceProperties *props;
2292 props = g_malloc0(sizeof(*props));
2293 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2294 info->value->props = props;
2297 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2298 info->value->target = target;
2299 if (target == SYS_EMU_TARGET_S390X) {
2300 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2303 if (!cur_item) {
2304 head = cur_item = info;
2305 } else {
2306 cur_item->next = info;
2307 cur_item = info;
2311 return head;
2314 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2315 bool has_cpu, int64_t cpu_index, Error **errp)
2317 FILE *f;
2318 uint32_t l;
2319 CPUState *cpu;
2320 uint8_t buf[1024];
2321 int64_t orig_addr = addr, orig_size = size;
2323 if (!has_cpu) {
2324 cpu_index = 0;
2327 cpu = qemu_get_cpu(cpu_index);
2328 if (cpu == NULL) {
2329 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2330 "a CPU number");
2331 return;
2334 f = fopen(filename, "wb");
2335 if (!f) {
2336 error_setg_file_open(errp, errno, filename);
2337 return;
2340 while (size != 0) {
2341 l = sizeof(buf);
2342 if (l > size)
2343 l = size;
2344 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2345 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2346 " specified", orig_addr, orig_size);
2347 goto exit;
2349 if (fwrite(buf, 1, l, f) != l) {
2350 error_setg(errp, QERR_IO_ERROR);
2351 goto exit;
2353 addr += l;
2354 size -= l;
2357 exit:
2358 fclose(f);
2361 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2362 Error **errp)
2364 FILE *f;
2365 uint32_t l;
2366 uint8_t buf[1024];
2368 f = fopen(filename, "wb");
2369 if (!f) {
2370 error_setg_file_open(errp, errno, filename);
2371 return;
2374 while (size != 0) {
2375 l = sizeof(buf);
2376 if (l > size)
2377 l = size;
2378 cpu_physical_memory_read(addr, buf, l);
2379 if (fwrite(buf, 1, l, f) != l) {
2380 error_setg(errp, QERR_IO_ERROR);
2381 goto exit;
2383 addr += l;
2384 size -= l;
2387 exit:
2388 fclose(f);
2391 void qmp_inject_nmi(Error **errp)
2393 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2396 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2398 if (!use_icount) {
2399 return;
2402 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2403 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2404 if (icount_align_option) {
2405 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2406 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2407 } else {
2408 cpu_fprintf(f, "Max guest delay NA\n");
2409 cpu_fprintf(f, "Max guest advance NA\n");