spapr_pci: Improve error message
[qemu/ar7.git] / cpus.c
blob111ca4ed1cdb6dd94282b798863e6063b9b5d0cf
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "qemu/qemu-print.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/block-backend.h"
37 #include "exec/gdbstub.h"
38 #include "sysemu/dma.h"
39 #include "sysemu/hw_accel.h"
40 #include "sysemu/kvm.h"
41 #include "sysemu/hax.h"
42 #include "sysemu/hvf.h"
43 #include "sysemu/whpx.h"
44 #include "exec/exec-all.h"
46 #include "qemu/thread.h"
47 #include "sysemu/cpus.h"
48 #include "sysemu/qtest.h"
49 #include "qemu/main-loop.h"
50 #include "qemu/option.h"
51 #include "qemu/bitmap.h"
52 #include "qemu/seqlock.h"
53 #include "qemu/guest-random.h"
54 #include "tcg.h"
55 #include "hw/nmi.h"
56 #include "sysemu/replay.h"
57 #include "hw/boards.h"
59 #ifdef CONFIG_LINUX
61 #include <sys/prctl.h>
63 #ifndef PR_MCE_KILL
64 #define PR_MCE_KILL 33
65 #endif
67 #ifndef PR_MCE_KILL_SET
68 #define PR_MCE_KILL_SET 1
69 #endif
71 #ifndef PR_MCE_KILL_EARLY
72 #define PR_MCE_KILL_EARLY 1
73 #endif
75 #endif /* CONFIG_LINUX */
77 int64_t max_delay;
78 int64_t max_advance;
80 /* vcpu throttling controls */
81 static QEMUTimer *throttle_timer;
82 static unsigned int throttle_percentage;
84 #define CPU_THROTTLE_PCT_MIN 1
85 #define CPU_THROTTLE_PCT_MAX 99
86 #define CPU_THROTTLE_TIMESLICE_NS 10000000
88 bool cpu_is_stopped(CPUState *cpu)
90 return cpu->stopped || !runstate_is_running();
93 static bool cpu_thread_is_idle(CPUState *cpu)
95 if (cpu->stop || cpu->queued_work_first) {
96 return false;
98 if (cpu_is_stopped(cpu)) {
99 return true;
101 if (!cpu->halted || cpu_has_work(cpu) ||
102 kvm_halt_in_kernel()) {
103 return false;
105 return true;
108 static bool all_cpu_threads_idle(void)
110 CPUState *cpu;
112 CPU_FOREACH(cpu) {
113 if (!cpu_thread_is_idle(cpu)) {
114 return false;
117 return true;
120 /***********************************************************/
121 /* guest cycle counter */
123 /* Protected by TimersState seqlock */
125 static bool icount_sleep = true;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
129 typedef struct TimersState {
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
134 /* Protect fields that can be respectively read outside the
135 * BQL, and written from multiple threads.
137 QemuSeqLock vm_clock_seqlock;
138 QemuSpin vm_clock_lock;
140 int16_t cpu_ticks_enabled;
142 /* Conversion factor from emulated instructions to virtual clock ticks. */
143 int16_t icount_time_shift;
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
148 int64_t vm_clock_warp_start;
149 int64_t cpu_clock_offset;
151 /* Only written by TCG thread */
152 int64_t qemu_icount;
154 /* for adjusting icount */
155 QEMUTimer *icount_rt_timer;
156 QEMUTimer *icount_vm_timer;
157 QEMUTimer *icount_warp_timer;
158 } TimersState;
160 static TimersState timers_state;
161 bool mttcg_enabled;
164 * We default to false if we know other options have been enabled
165 * which are currently incompatible with MTTCG. Otherwise when each
166 * guest (target) has been updated to support:
167 * - atomic instructions
168 * - memory ordering primitives (barriers)
169 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
171 * Once a guest architecture has been converted to the new primitives
172 * there are two remaining limitations to check.
174 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
175 * - The host must have a stronger memory order than the guest
177 * It may be possible in future to support strong guests on weak hosts
178 * but that will require tagging all load/stores in a guest with their
179 * implicit memory order requirements which would likely slow things
180 * down a lot.
183 static bool check_tcg_memory_orders_compatible(void)
185 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
186 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
187 #else
188 return false;
189 #endif
192 static bool default_mttcg_enabled(void)
194 if (use_icount || TCG_OVERSIZED_GUEST) {
195 return false;
196 } else {
197 #ifdef TARGET_SUPPORTS_MTTCG
198 return check_tcg_memory_orders_compatible();
199 #else
200 return false;
201 #endif
205 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
207 const char *t = qemu_opt_get(opts, "thread");
208 if (t) {
209 if (strcmp(t, "multi") == 0) {
210 if (TCG_OVERSIZED_GUEST) {
211 error_setg(errp, "No MTTCG when guest word size > hosts");
212 } else if (use_icount) {
213 error_setg(errp, "No MTTCG when icount is enabled");
214 } else {
215 #ifndef TARGET_SUPPORTS_MTTCG
216 warn_report("Guest not yet converted to MTTCG - "
217 "you may get unexpected results");
218 #endif
219 if (!check_tcg_memory_orders_compatible()) {
220 warn_report("Guest expects a stronger memory ordering "
221 "than the host provides");
222 error_printf("This may cause strange/hard to debug errors\n");
224 mttcg_enabled = true;
226 } else if (strcmp(t, "single") == 0) {
227 mttcg_enabled = false;
228 } else {
229 error_setg(errp, "Invalid 'thread' setting %s", t);
231 } else {
232 mttcg_enabled = default_mttcg_enabled();
236 /* The current number of executed instructions is based on what we
237 * originally budgeted minus the current state of the decrementing
238 * icount counters in extra/u16.low.
240 static int64_t cpu_get_icount_executed(CPUState *cpu)
242 return (cpu->icount_budget -
243 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
247 * Update the global shared timer_state.qemu_icount to take into
248 * account executed instructions. This is done by the TCG vCPU
249 * thread so the main-loop can see time has moved forward.
251 static void cpu_update_icount_locked(CPUState *cpu)
253 int64_t executed = cpu_get_icount_executed(cpu);
254 cpu->icount_budget -= executed;
256 atomic_set_i64(&timers_state.qemu_icount,
257 timers_state.qemu_icount + executed);
261 * Update the global shared timer_state.qemu_icount to take into
262 * account executed instructions. This is done by the TCG vCPU
263 * thread so the main-loop can see time has moved forward.
265 void cpu_update_icount(CPUState *cpu)
267 seqlock_write_lock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
269 cpu_update_icount_locked(cpu);
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
271 &timers_state.vm_clock_lock);
274 static int64_t cpu_get_icount_raw_locked(void)
276 CPUState *cpu = current_cpu;
278 if (cpu && cpu->running) {
279 if (!cpu->can_do_io) {
280 error_report("Bad icount read");
281 exit(1);
283 /* Take into account what has run */
284 cpu_update_icount_locked(cpu);
286 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
287 return atomic_read_i64(&timers_state.qemu_icount);
290 static int64_t cpu_get_icount_locked(void)
292 int64_t icount = cpu_get_icount_raw_locked();
293 return atomic_read_i64(&timers_state.qemu_icount_bias) +
294 cpu_icount_to_ns(icount);
297 int64_t cpu_get_icount_raw(void)
299 int64_t icount;
300 unsigned start;
302 do {
303 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
304 icount = cpu_get_icount_raw_locked();
305 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
307 return icount;
310 /* Return the virtual CPU time, based on the instruction counter. */
311 int64_t cpu_get_icount(void)
313 int64_t icount;
314 unsigned start;
316 do {
317 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
318 icount = cpu_get_icount_locked();
319 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
321 return icount;
324 int64_t cpu_icount_to_ns(int64_t icount)
326 return icount << atomic_read(&timers_state.icount_time_shift);
329 static int64_t cpu_get_ticks_locked(void)
331 int64_t ticks = timers_state.cpu_ticks_offset;
332 if (timers_state.cpu_ticks_enabled) {
333 ticks += cpu_get_host_ticks();
336 if (timers_state.cpu_ticks_prev > ticks) {
337 /* Non increasing ticks may happen if the host uses software suspend. */
338 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
339 ticks = timers_state.cpu_ticks_prev;
342 timers_state.cpu_ticks_prev = ticks;
343 return ticks;
346 /* return the time elapsed in VM between vm_start and vm_stop. Unless
347 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
348 * counter.
350 int64_t cpu_get_ticks(void)
352 int64_t ticks;
354 if (use_icount) {
355 return cpu_get_icount();
358 qemu_spin_lock(&timers_state.vm_clock_lock);
359 ticks = cpu_get_ticks_locked();
360 qemu_spin_unlock(&timers_state.vm_clock_lock);
361 return ticks;
364 static int64_t cpu_get_clock_locked(void)
366 int64_t time;
368 time = timers_state.cpu_clock_offset;
369 if (timers_state.cpu_ticks_enabled) {
370 time += get_clock();
373 return time;
376 /* Return the monotonic time elapsed in VM, i.e.,
377 * the time between vm_start and vm_stop
379 int64_t cpu_get_clock(void)
381 int64_t ti;
382 unsigned start;
384 do {
385 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
386 ti = cpu_get_clock_locked();
387 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
389 return ti;
392 /* enable cpu_get_ticks()
393 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
395 void cpu_enable_ticks(void)
397 seqlock_write_lock(&timers_state.vm_clock_seqlock,
398 &timers_state.vm_clock_lock);
399 if (!timers_state.cpu_ticks_enabled) {
400 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
401 timers_state.cpu_clock_offset -= get_clock();
402 timers_state.cpu_ticks_enabled = 1;
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405 &timers_state.vm_clock_lock);
408 /* disable cpu_get_ticks() : the clock is stopped. You must not call
409 * cpu_get_ticks() after that.
410 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
412 void cpu_disable_ticks(void)
414 seqlock_write_lock(&timers_state.vm_clock_seqlock,
415 &timers_state.vm_clock_lock);
416 if (timers_state.cpu_ticks_enabled) {
417 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
418 timers_state.cpu_clock_offset = cpu_get_clock_locked();
419 timers_state.cpu_ticks_enabled = 0;
421 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
422 &timers_state.vm_clock_lock);
425 /* Correlation between real and virtual time is always going to be
426 fairly approximate, so ignore small variation.
427 When the guest is idle real and virtual time will be aligned in
428 the IO wait loop. */
429 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
431 static void icount_adjust(void)
433 int64_t cur_time;
434 int64_t cur_icount;
435 int64_t delta;
437 /* Protected by TimersState mutex. */
438 static int64_t last_delta;
440 /* If the VM is not running, then do nothing. */
441 if (!runstate_is_running()) {
442 return;
445 seqlock_write_lock(&timers_state.vm_clock_seqlock,
446 &timers_state.vm_clock_lock);
447 cur_time = cpu_get_clock_locked();
448 cur_icount = cpu_get_icount_locked();
450 delta = cur_icount - cur_time;
451 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
452 if (delta > 0
453 && last_delta + ICOUNT_WOBBLE < delta * 2
454 && timers_state.icount_time_shift > 0) {
455 /* The guest is getting too far ahead. Slow time down. */
456 atomic_set(&timers_state.icount_time_shift,
457 timers_state.icount_time_shift - 1);
459 if (delta < 0
460 && last_delta - ICOUNT_WOBBLE > delta * 2
461 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
462 /* The guest is getting too far behind. Speed time up. */
463 atomic_set(&timers_state.icount_time_shift,
464 timers_state.icount_time_shift + 1);
466 last_delta = delta;
467 atomic_set_i64(&timers_state.qemu_icount_bias,
468 cur_icount - (timers_state.qemu_icount
469 << timers_state.icount_time_shift));
470 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
471 &timers_state.vm_clock_lock);
474 static void icount_adjust_rt(void *opaque)
476 timer_mod(timers_state.icount_rt_timer,
477 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
478 icount_adjust();
481 static void icount_adjust_vm(void *opaque)
483 timer_mod(timers_state.icount_vm_timer,
484 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
485 NANOSECONDS_PER_SECOND / 10);
486 icount_adjust();
489 static int64_t qemu_icount_round(int64_t count)
491 int shift = atomic_read(&timers_state.icount_time_shift);
492 return (count + (1 << shift) - 1) >> shift;
495 static void icount_warp_rt(void)
497 unsigned seq;
498 int64_t warp_start;
500 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
501 * changes from -1 to another value, so the race here is okay.
503 do {
504 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
505 warp_start = timers_state.vm_clock_warp_start;
506 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
508 if (warp_start == -1) {
509 return;
512 seqlock_write_lock(&timers_state.vm_clock_seqlock,
513 &timers_state.vm_clock_lock);
514 if (runstate_is_running()) {
515 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
516 cpu_get_clock_locked());
517 int64_t warp_delta;
519 warp_delta = clock - timers_state.vm_clock_warp_start;
520 if (use_icount == 2) {
522 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
523 * far ahead of real time.
525 int64_t cur_icount = cpu_get_icount_locked();
526 int64_t delta = clock - cur_icount;
527 warp_delta = MIN(warp_delta, delta);
529 atomic_set_i64(&timers_state.qemu_icount_bias,
530 timers_state.qemu_icount_bias + warp_delta);
532 timers_state.vm_clock_warp_start = -1;
533 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
534 &timers_state.vm_clock_lock);
536 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
537 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
541 static void icount_timer_cb(void *opaque)
543 /* No need for a checkpoint because the timer already synchronizes
544 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
546 icount_warp_rt();
549 void qtest_clock_warp(int64_t dest)
551 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
552 AioContext *aio_context;
553 assert(qtest_enabled());
554 aio_context = qemu_get_aio_context();
555 while (clock < dest) {
556 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
557 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
559 seqlock_write_lock(&timers_state.vm_clock_seqlock,
560 &timers_state.vm_clock_lock);
561 atomic_set_i64(&timers_state.qemu_icount_bias,
562 timers_state.qemu_icount_bias + warp);
563 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
564 &timers_state.vm_clock_lock);
566 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
567 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
568 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
570 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
573 void qemu_start_warp_timer(void)
575 int64_t clock;
576 int64_t deadline;
578 if (!use_icount) {
579 return;
582 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
583 * do not fire, so computing the deadline does not make sense.
585 if (!runstate_is_running()) {
586 return;
589 if (replay_mode != REPLAY_MODE_PLAY) {
590 if (!all_cpu_threads_idle()) {
591 return;
594 if (qtest_enabled()) {
595 /* When testing, qtest commands advance icount. */
596 return;
599 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
600 } else {
601 /* warp clock deterministically in record/replay mode */
602 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
603 /* vCPU is sleeping and warp can't be started.
604 It is probably a race condition: notification sent
605 to vCPU was processed in advance and vCPU went to sleep.
606 Therefore we have to wake it up for doing someting. */
607 if (replay_has_checkpoint()) {
608 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
610 return;
614 /* We want to use the earliest deadline from ALL vm_clocks */
615 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
616 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
617 if (deadline < 0) {
618 static bool notified;
619 if (!icount_sleep && !notified) {
620 warn_report("icount sleep disabled and no active timers");
621 notified = true;
623 return;
626 if (deadline > 0) {
628 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
629 * sleep. Otherwise, the CPU might be waiting for a future timer
630 * interrupt to wake it up, but the interrupt never comes because
631 * the vCPU isn't running any insns and thus doesn't advance the
632 * QEMU_CLOCK_VIRTUAL.
634 if (!icount_sleep) {
636 * We never let VCPUs sleep in no sleep icount mode.
637 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
638 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
639 * It is useful when we want a deterministic execution time,
640 * isolated from host latencies.
642 seqlock_write_lock(&timers_state.vm_clock_seqlock,
643 &timers_state.vm_clock_lock);
644 atomic_set_i64(&timers_state.qemu_icount_bias,
645 timers_state.qemu_icount_bias + deadline);
646 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
647 &timers_state.vm_clock_lock);
648 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
649 } else {
651 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
652 * "real" time, (related to the time left until the next event) has
653 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
654 * This avoids that the warps are visible externally; for example,
655 * you will not be sending network packets continuously instead of
656 * every 100ms.
658 seqlock_write_lock(&timers_state.vm_clock_seqlock,
659 &timers_state.vm_clock_lock);
660 if (timers_state.vm_clock_warp_start == -1
661 || timers_state.vm_clock_warp_start > clock) {
662 timers_state.vm_clock_warp_start = clock;
664 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
665 &timers_state.vm_clock_lock);
666 timer_mod_anticipate(timers_state.icount_warp_timer,
667 clock + deadline);
669 } else if (deadline == 0) {
670 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
674 static void qemu_account_warp_timer(void)
676 if (!use_icount || !icount_sleep) {
677 return;
680 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
681 * do not fire, so computing the deadline does not make sense.
683 if (!runstate_is_running()) {
684 return;
687 /* warp clock deterministically in record/replay mode */
688 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
689 return;
692 timer_del(timers_state.icount_warp_timer);
693 icount_warp_rt();
696 static bool icount_state_needed(void *opaque)
698 return use_icount;
701 static bool warp_timer_state_needed(void *opaque)
703 TimersState *s = opaque;
704 return s->icount_warp_timer != NULL;
707 static bool adjust_timers_state_needed(void *opaque)
709 TimersState *s = opaque;
710 return s->icount_rt_timer != NULL;
714 * Subsection for warp timer migration is optional, because may not be created
716 static const VMStateDescription icount_vmstate_warp_timer = {
717 .name = "timer/icount/warp_timer",
718 .version_id = 1,
719 .minimum_version_id = 1,
720 .needed = warp_timer_state_needed,
721 .fields = (VMStateField[]) {
722 VMSTATE_INT64(vm_clock_warp_start, TimersState),
723 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
724 VMSTATE_END_OF_LIST()
728 static const VMStateDescription icount_vmstate_adjust_timers = {
729 .name = "timer/icount/timers",
730 .version_id = 1,
731 .minimum_version_id = 1,
732 .needed = adjust_timers_state_needed,
733 .fields = (VMStateField[]) {
734 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
735 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
736 VMSTATE_END_OF_LIST()
741 * This is a subsection for icount migration.
743 static const VMStateDescription icount_vmstate_timers = {
744 .name = "timer/icount",
745 .version_id = 1,
746 .minimum_version_id = 1,
747 .needed = icount_state_needed,
748 .fields = (VMStateField[]) {
749 VMSTATE_INT64(qemu_icount_bias, TimersState),
750 VMSTATE_INT64(qemu_icount, TimersState),
751 VMSTATE_END_OF_LIST()
753 .subsections = (const VMStateDescription*[]) {
754 &icount_vmstate_warp_timer,
755 &icount_vmstate_adjust_timers,
756 NULL
760 static const VMStateDescription vmstate_timers = {
761 .name = "timer",
762 .version_id = 2,
763 .minimum_version_id = 1,
764 .fields = (VMStateField[]) {
765 VMSTATE_INT64(cpu_ticks_offset, TimersState),
766 VMSTATE_UNUSED(8),
767 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
768 VMSTATE_END_OF_LIST()
770 .subsections = (const VMStateDescription*[]) {
771 &icount_vmstate_timers,
772 NULL
776 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
778 double pct;
779 double throttle_ratio;
780 long sleeptime_ns;
782 if (!cpu_throttle_get_percentage()) {
783 return;
786 pct = (double)cpu_throttle_get_percentage()/100;
787 throttle_ratio = pct / (1 - pct);
788 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
790 qemu_mutex_unlock_iothread();
791 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
792 qemu_mutex_lock_iothread();
793 atomic_set(&cpu->throttle_thread_scheduled, 0);
796 static void cpu_throttle_timer_tick(void *opaque)
798 CPUState *cpu;
799 double pct;
801 /* Stop the timer if needed */
802 if (!cpu_throttle_get_percentage()) {
803 return;
805 CPU_FOREACH(cpu) {
806 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
807 async_run_on_cpu(cpu, cpu_throttle_thread,
808 RUN_ON_CPU_NULL);
812 pct = (double)cpu_throttle_get_percentage()/100;
813 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
814 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
817 void cpu_throttle_set(int new_throttle_pct)
819 /* Ensure throttle percentage is within valid range */
820 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
821 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
823 atomic_set(&throttle_percentage, new_throttle_pct);
825 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
826 CPU_THROTTLE_TIMESLICE_NS);
829 void cpu_throttle_stop(void)
831 atomic_set(&throttle_percentage, 0);
834 bool cpu_throttle_active(void)
836 return (cpu_throttle_get_percentage() != 0);
839 int cpu_throttle_get_percentage(void)
841 return atomic_read(&throttle_percentage);
844 void cpu_ticks_init(void)
846 seqlock_init(&timers_state.vm_clock_seqlock);
847 qemu_spin_init(&timers_state.vm_clock_lock);
848 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
849 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
850 cpu_throttle_timer_tick, NULL);
853 void configure_icount(QemuOpts *opts, Error **errp)
855 const char *option;
856 char *rem_str = NULL;
858 option = qemu_opt_get(opts, "shift");
859 if (!option) {
860 if (qemu_opt_get(opts, "align") != NULL) {
861 error_setg(errp, "Please specify shift option when using align");
863 return;
866 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
867 if (icount_sleep) {
868 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
869 icount_timer_cb, NULL);
872 icount_align_option = qemu_opt_get_bool(opts, "align", false);
874 if (icount_align_option && !icount_sleep) {
875 error_setg(errp, "align=on and sleep=off are incompatible");
877 if (strcmp(option, "auto") != 0) {
878 errno = 0;
879 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
880 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
881 error_setg(errp, "icount: Invalid shift value");
883 use_icount = 1;
884 return;
885 } else if (icount_align_option) {
886 error_setg(errp, "shift=auto and align=on are incompatible");
887 } else if (!icount_sleep) {
888 error_setg(errp, "shift=auto and sleep=off are incompatible");
891 use_icount = 2;
893 /* 125MIPS seems a reasonable initial guess at the guest speed.
894 It will be corrected fairly quickly anyway. */
895 timers_state.icount_time_shift = 3;
897 /* Have both realtime and virtual time triggers for speed adjustment.
898 The realtime trigger catches emulated time passing too slowly,
899 the virtual time trigger catches emulated time passing too fast.
900 Realtime triggers occur even when idle, so use them less frequently
901 than VM triggers. */
902 timers_state.vm_clock_warp_start = -1;
903 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
904 icount_adjust_rt, NULL);
905 timer_mod(timers_state.icount_rt_timer,
906 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
907 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
908 icount_adjust_vm, NULL);
909 timer_mod(timers_state.icount_vm_timer,
910 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
911 NANOSECONDS_PER_SECOND / 10);
914 /***********************************************************/
915 /* TCG vCPU kick timer
917 * The kick timer is responsible for moving single threaded vCPU
918 * emulation on to the next vCPU. If more than one vCPU is running a
919 * timer event with force a cpu->exit so the next vCPU can get
920 * scheduled.
922 * The timer is removed if all vCPUs are idle and restarted again once
923 * idleness is complete.
926 static QEMUTimer *tcg_kick_vcpu_timer;
927 static CPUState *tcg_current_rr_cpu;
929 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
931 static inline int64_t qemu_tcg_next_kick(void)
933 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
936 /* Kick the currently round-robin scheduled vCPU */
937 static void qemu_cpu_kick_rr_cpu(void)
939 CPUState *cpu;
940 do {
941 cpu = atomic_mb_read(&tcg_current_rr_cpu);
942 if (cpu) {
943 cpu_exit(cpu);
945 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
948 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
952 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
954 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
955 qemu_notify_event();
956 return;
959 if (qemu_in_vcpu_thread()) {
960 /* A CPU is currently running; kick it back out to the
961 * tcg_cpu_exec() loop so it will recalculate its
962 * icount deadline immediately.
964 qemu_cpu_kick(current_cpu);
965 } else if (first_cpu) {
966 /* qemu_cpu_kick is not enough to kick a halted CPU out of
967 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
968 * causes cpu_thread_is_idle to return false. This way,
969 * handle_icount_deadline can run.
970 * If we have no CPUs at all for some reason, we don't
971 * need to do anything.
973 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
977 static void kick_tcg_thread(void *opaque)
979 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
980 qemu_cpu_kick_rr_cpu();
983 static void start_tcg_kick_timer(void)
985 assert(!mttcg_enabled);
986 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
987 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
988 kick_tcg_thread, NULL);
990 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
991 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
995 static void stop_tcg_kick_timer(void)
997 assert(!mttcg_enabled);
998 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
999 timer_del(tcg_kick_vcpu_timer);
1003 /***********************************************************/
1004 void hw_error(const char *fmt, ...)
1006 va_list ap;
1007 CPUState *cpu;
1009 va_start(ap, fmt);
1010 fprintf(stderr, "qemu: hardware error: ");
1011 vfprintf(stderr, fmt, ap);
1012 fprintf(stderr, "\n");
1013 CPU_FOREACH(cpu) {
1014 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1015 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
1017 va_end(ap);
1018 abort();
1021 void cpu_synchronize_all_states(void)
1023 CPUState *cpu;
1025 CPU_FOREACH(cpu) {
1026 cpu_synchronize_state(cpu);
1027 /* TODO: move to cpu_synchronize_state() */
1028 if (hvf_enabled()) {
1029 hvf_cpu_synchronize_state(cpu);
1034 void cpu_synchronize_all_post_reset(void)
1036 CPUState *cpu;
1038 CPU_FOREACH(cpu) {
1039 cpu_synchronize_post_reset(cpu);
1040 /* TODO: move to cpu_synchronize_post_reset() */
1041 if (hvf_enabled()) {
1042 hvf_cpu_synchronize_post_reset(cpu);
1047 void cpu_synchronize_all_post_init(void)
1049 CPUState *cpu;
1051 CPU_FOREACH(cpu) {
1052 cpu_synchronize_post_init(cpu);
1053 /* TODO: move to cpu_synchronize_post_init() */
1054 if (hvf_enabled()) {
1055 hvf_cpu_synchronize_post_init(cpu);
1060 void cpu_synchronize_all_pre_loadvm(void)
1062 CPUState *cpu;
1064 CPU_FOREACH(cpu) {
1065 cpu_synchronize_pre_loadvm(cpu);
1069 static int do_vm_stop(RunState state, bool send_stop)
1071 int ret = 0;
1073 if (runstate_is_running()) {
1074 cpu_disable_ticks();
1075 pause_all_vcpus();
1076 runstate_set(state);
1077 vm_state_notify(0, state);
1078 if (send_stop) {
1079 qapi_event_send_stop();
1083 bdrv_drain_all();
1084 replay_disable_events();
1085 ret = bdrv_flush_all();
1087 return ret;
1090 /* Special vm_stop() variant for terminating the process. Historically clients
1091 * did not expect a QMP STOP event and so we need to retain compatibility.
1093 int vm_shutdown(void)
1095 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1098 static bool cpu_can_run(CPUState *cpu)
1100 if (cpu->stop) {
1101 return false;
1103 if (cpu_is_stopped(cpu)) {
1104 return false;
1106 return true;
1109 static void cpu_handle_guest_debug(CPUState *cpu)
1111 gdb_set_stop_cpu(cpu);
1112 qemu_system_debug_request();
1113 cpu->stopped = true;
1116 #ifdef CONFIG_LINUX
1117 static void sigbus_reraise(void)
1119 sigset_t set;
1120 struct sigaction action;
1122 memset(&action, 0, sizeof(action));
1123 action.sa_handler = SIG_DFL;
1124 if (!sigaction(SIGBUS, &action, NULL)) {
1125 raise(SIGBUS);
1126 sigemptyset(&set);
1127 sigaddset(&set, SIGBUS);
1128 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1130 perror("Failed to re-raise SIGBUS!\n");
1131 abort();
1134 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1136 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1137 sigbus_reraise();
1140 if (current_cpu) {
1141 /* Called asynchronously in VCPU thread. */
1142 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1143 sigbus_reraise();
1145 } else {
1146 /* Called synchronously (via signalfd) in main thread. */
1147 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1148 sigbus_reraise();
1153 static void qemu_init_sigbus(void)
1155 struct sigaction action;
1157 memset(&action, 0, sizeof(action));
1158 action.sa_flags = SA_SIGINFO;
1159 action.sa_sigaction = sigbus_handler;
1160 sigaction(SIGBUS, &action, NULL);
1162 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1164 #else /* !CONFIG_LINUX */
1165 static void qemu_init_sigbus(void)
1168 #endif /* !CONFIG_LINUX */
1170 static QemuMutex qemu_global_mutex;
1172 static QemuThread io_thread;
1174 /* cpu creation */
1175 static QemuCond qemu_cpu_cond;
1176 /* system init */
1177 static QemuCond qemu_pause_cond;
1179 void qemu_init_cpu_loop(void)
1181 qemu_init_sigbus();
1182 qemu_cond_init(&qemu_cpu_cond);
1183 qemu_cond_init(&qemu_pause_cond);
1184 qemu_mutex_init(&qemu_global_mutex);
1186 qemu_thread_get_self(&io_thread);
1189 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1191 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1194 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1196 if (kvm_destroy_vcpu(cpu) < 0) {
1197 error_report("kvm_destroy_vcpu failed");
1198 exit(EXIT_FAILURE);
1202 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1206 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1208 g_assert(qemu_cpu_is_self(cpu));
1209 cpu->stop = false;
1210 cpu->stopped = true;
1211 if (exit) {
1212 cpu_exit(cpu);
1214 qemu_cond_broadcast(&qemu_pause_cond);
1217 static void qemu_wait_io_event_common(CPUState *cpu)
1219 atomic_mb_set(&cpu->thread_kicked, false);
1220 if (cpu->stop) {
1221 qemu_cpu_stop(cpu, false);
1223 process_queued_cpu_work(cpu);
1226 static void qemu_tcg_rr_wait_io_event(void)
1228 CPUState *cpu;
1230 while (all_cpu_threads_idle()) {
1231 stop_tcg_kick_timer();
1232 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1235 start_tcg_kick_timer();
1237 CPU_FOREACH(cpu) {
1238 qemu_wait_io_event_common(cpu);
1242 static void qemu_wait_io_event(CPUState *cpu)
1244 while (cpu_thread_is_idle(cpu)) {
1245 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1248 #ifdef _WIN32
1249 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1250 if (!tcg_enabled()) {
1251 SleepEx(0, TRUE);
1253 #endif
1254 qemu_wait_io_event_common(cpu);
1257 static void *qemu_kvm_cpu_thread_fn(void *arg)
1259 CPUState *cpu = arg;
1260 int r;
1262 rcu_register_thread();
1264 qemu_mutex_lock_iothread();
1265 qemu_thread_get_self(cpu->thread);
1266 cpu->thread_id = qemu_get_thread_id();
1267 cpu->can_do_io = 1;
1268 current_cpu = cpu;
1270 r = kvm_init_vcpu(cpu);
1271 if (r < 0) {
1272 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1273 exit(1);
1276 kvm_init_cpu_signals(cpu);
1278 /* signal CPU creation */
1279 cpu->created = true;
1280 qemu_cond_signal(&qemu_cpu_cond);
1281 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1283 do {
1284 if (cpu_can_run(cpu)) {
1285 r = kvm_cpu_exec(cpu);
1286 if (r == EXCP_DEBUG) {
1287 cpu_handle_guest_debug(cpu);
1290 qemu_wait_io_event(cpu);
1291 } while (!cpu->unplug || cpu_can_run(cpu));
1293 qemu_kvm_destroy_vcpu(cpu);
1294 cpu->created = false;
1295 qemu_cond_signal(&qemu_cpu_cond);
1296 qemu_mutex_unlock_iothread();
1297 rcu_unregister_thread();
1298 return NULL;
1301 static void *qemu_dummy_cpu_thread_fn(void *arg)
1303 #ifdef _WIN32
1304 error_report("qtest is not supported under Windows");
1305 exit(1);
1306 #else
1307 CPUState *cpu = arg;
1308 sigset_t waitset;
1309 int r;
1311 rcu_register_thread();
1313 qemu_mutex_lock_iothread();
1314 qemu_thread_get_self(cpu->thread);
1315 cpu->thread_id = qemu_get_thread_id();
1316 cpu->can_do_io = 1;
1317 current_cpu = cpu;
1319 sigemptyset(&waitset);
1320 sigaddset(&waitset, SIG_IPI);
1322 /* signal CPU creation */
1323 cpu->created = true;
1324 qemu_cond_signal(&qemu_cpu_cond);
1325 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1327 do {
1328 qemu_mutex_unlock_iothread();
1329 do {
1330 int sig;
1331 r = sigwait(&waitset, &sig);
1332 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1333 if (r == -1) {
1334 perror("sigwait");
1335 exit(1);
1337 qemu_mutex_lock_iothread();
1338 qemu_wait_io_event(cpu);
1339 } while (!cpu->unplug);
1341 qemu_mutex_unlock_iothread();
1342 rcu_unregister_thread();
1343 return NULL;
1344 #endif
1347 static int64_t tcg_get_icount_limit(void)
1349 int64_t deadline;
1351 if (replay_mode != REPLAY_MODE_PLAY) {
1352 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1354 /* Maintain prior (possibly buggy) behaviour where if no deadline
1355 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1356 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1357 * nanoseconds.
1359 if ((deadline < 0) || (deadline > INT32_MAX)) {
1360 deadline = INT32_MAX;
1363 return qemu_icount_round(deadline);
1364 } else {
1365 return replay_get_instructions();
1369 static void handle_icount_deadline(void)
1371 assert(qemu_in_vcpu_thread());
1372 if (use_icount) {
1373 int64_t deadline =
1374 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1376 if (deadline == 0) {
1377 /* Wake up other AioContexts. */
1378 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1379 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1384 static void prepare_icount_for_run(CPUState *cpu)
1386 if (use_icount) {
1387 int insns_left;
1389 /* These should always be cleared by process_icount_data after
1390 * each vCPU execution. However u16.high can be raised
1391 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1393 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1394 g_assert(cpu->icount_extra == 0);
1396 cpu->icount_budget = tcg_get_icount_limit();
1397 insns_left = MIN(0xffff, cpu->icount_budget);
1398 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1399 cpu->icount_extra = cpu->icount_budget - insns_left;
1401 replay_mutex_lock();
1405 static void process_icount_data(CPUState *cpu)
1407 if (use_icount) {
1408 /* Account for executed instructions */
1409 cpu_update_icount(cpu);
1411 /* Reset the counters */
1412 cpu_neg(cpu)->icount_decr.u16.low = 0;
1413 cpu->icount_extra = 0;
1414 cpu->icount_budget = 0;
1416 replay_account_executed_instructions();
1418 replay_mutex_unlock();
1423 static int tcg_cpu_exec(CPUState *cpu)
1425 int ret;
1426 #ifdef CONFIG_PROFILER
1427 int64_t ti;
1428 #endif
1430 assert(tcg_enabled());
1431 #ifdef CONFIG_PROFILER
1432 ti = profile_getclock();
1433 #endif
1434 cpu_exec_start(cpu);
1435 ret = cpu_exec(cpu);
1436 cpu_exec_end(cpu);
1437 #ifdef CONFIG_PROFILER
1438 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1439 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1440 #endif
1441 return ret;
1444 /* Destroy any remaining vCPUs which have been unplugged and have
1445 * finished running
1447 static void deal_with_unplugged_cpus(void)
1449 CPUState *cpu;
1451 CPU_FOREACH(cpu) {
1452 if (cpu->unplug && !cpu_can_run(cpu)) {
1453 qemu_tcg_destroy_vcpu(cpu);
1454 cpu->created = false;
1455 qemu_cond_signal(&qemu_cpu_cond);
1456 break;
1461 /* Single-threaded TCG
1463 * In the single-threaded case each vCPU is simulated in turn. If
1464 * there is more than a single vCPU we create a simple timer to kick
1465 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1466 * This is done explicitly rather than relying on side-effects
1467 * elsewhere.
1470 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1472 CPUState *cpu = arg;
1474 assert(tcg_enabled());
1475 rcu_register_thread();
1476 tcg_register_thread();
1478 qemu_mutex_lock_iothread();
1479 qemu_thread_get_self(cpu->thread);
1481 cpu->thread_id = qemu_get_thread_id();
1482 cpu->created = true;
1483 cpu->can_do_io = 1;
1484 qemu_cond_signal(&qemu_cpu_cond);
1485 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1487 /* wait for initial kick-off after machine start */
1488 while (first_cpu->stopped) {
1489 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1491 /* process any pending work */
1492 CPU_FOREACH(cpu) {
1493 current_cpu = cpu;
1494 qemu_wait_io_event_common(cpu);
1498 start_tcg_kick_timer();
1500 cpu = first_cpu;
1502 /* process any pending work */
1503 cpu->exit_request = 1;
1505 while (1) {
1506 qemu_mutex_unlock_iothread();
1507 replay_mutex_lock();
1508 qemu_mutex_lock_iothread();
1509 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1510 qemu_account_warp_timer();
1512 /* Run the timers here. This is much more efficient than
1513 * waking up the I/O thread and waiting for completion.
1515 handle_icount_deadline();
1517 replay_mutex_unlock();
1519 if (!cpu) {
1520 cpu = first_cpu;
1523 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1525 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1526 current_cpu = cpu;
1528 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1529 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1531 if (cpu_can_run(cpu)) {
1532 int r;
1534 qemu_mutex_unlock_iothread();
1535 prepare_icount_for_run(cpu);
1537 r = tcg_cpu_exec(cpu);
1539 process_icount_data(cpu);
1540 qemu_mutex_lock_iothread();
1542 if (r == EXCP_DEBUG) {
1543 cpu_handle_guest_debug(cpu);
1544 break;
1545 } else if (r == EXCP_ATOMIC) {
1546 qemu_mutex_unlock_iothread();
1547 cpu_exec_step_atomic(cpu);
1548 qemu_mutex_lock_iothread();
1549 break;
1551 } else if (cpu->stop) {
1552 if (cpu->unplug) {
1553 cpu = CPU_NEXT(cpu);
1555 break;
1558 cpu = CPU_NEXT(cpu);
1559 } /* while (cpu && !cpu->exit_request).. */
1561 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1562 atomic_set(&tcg_current_rr_cpu, NULL);
1564 if (cpu && cpu->exit_request) {
1565 atomic_mb_set(&cpu->exit_request, 0);
1568 if (use_icount && all_cpu_threads_idle()) {
1570 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1571 * in the main_loop, wake it up in order to start the warp timer.
1573 qemu_notify_event();
1576 qemu_tcg_rr_wait_io_event();
1577 deal_with_unplugged_cpus();
1580 rcu_unregister_thread();
1581 return NULL;
1584 static void *qemu_hax_cpu_thread_fn(void *arg)
1586 CPUState *cpu = arg;
1587 int r;
1589 rcu_register_thread();
1590 qemu_mutex_lock_iothread();
1591 qemu_thread_get_self(cpu->thread);
1593 cpu->thread_id = qemu_get_thread_id();
1594 cpu->created = true;
1595 cpu->halted = 0;
1596 current_cpu = cpu;
1598 hax_init_vcpu(cpu);
1599 qemu_cond_signal(&qemu_cpu_cond);
1600 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1602 do {
1603 if (cpu_can_run(cpu)) {
1604 r = hax_smp_cpu_exec(cpu);
1605 if (r == EXCP_DEBUG) {
1606 cpu_handle_guest_debug(cpu);
1610 qemu_wait_io_event(cpu);
1611 } while (!cpu->unplug || cpu_can_run(cpu));
1612 rcu_unregister_thread();
1613 return NULL;
1616 /* The HVF-specific vCPU thread function. This one should only run when the host
1617 * CPU supports the VMX "unrestricted guest" feature. */
1618 static void *qemu_hvf_cpu_thread_fn(void *arg)
1620 CPUState *cpu = arg;
1622 int r;
1624 assert(hvf_enabled());
1626 rcu_register_thread();
1628 qemu_mutex_lock_iothread();
1629 qemu_thread_get_self(cpu->thread);
1631 cpu->thread_id = qemu_get_thread_id();
1632 cpu->can_do_io = 1;
1633 current_cpu = cpu;
1635 hvf_init_vcpu(cpu);
1637 /* signal CPU creation */
1638 cpu->created = true;
1639 qemu_cond_signal(&qemu_cpu_cond);
1640 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1642 do {
1643 if (cpu_can_run(cpu)) {
1644 r = hvf_vcpu_exec(cpu);
1645 if (r == EXCP_DEBUG) {
1646 cpu_handle_guest_debug(cpu);
1649 qemu_wait_io_event(cpu);
1650 } while (!cpu->unplug || cpu_can_run(cpu));
1652 hvf_vcpu_destroy(cpu);
1653 cpu->created = false;
1654 qemu_cond_signal(&qemu_cpu_cond);
1655 qemu_mutex_unlock_iothread();
1656 rcu_unregister_thread();
1657 return NULL;
1660 static void *qemu_whpx_cpu_thread_fn(void *arg)
1662 CPUState *cpu = arg;
1663 int r;
1665 rcu_register_thread();
1667 qemu_mutex_lock_iothread();
1668 qemu_thread_get_self(cpu->thread);
1669 cpu->thread_id = qemu_get_thread_id();
1670 current_cpu = cpu;
1672 r = whpx_init_vcpu(cpu);
1673 if (r < 0) {
1674 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1675 exit(1);
1678 /* signal CPU creation */
1679 cpu->created = true;
1680 qemu_cond_signal(&qemu_cpu_cond);
1681 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1683 do {
1684 if (cpu_can_run(cpu)) {
1685 r = whpx_vcpu_exec(cpu);
1686 if (r == EXCP_DEBUG) {
1687 cpu_handle_guest_debug(cpu);
1690 while (cpu_thread_is_idle(cpu)) {
1691 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1693 qemu_wait_io_event_common(cpu);
1694 } while (!cpu->unplug || cpu_can_run(cpu));
1696 whpx_destroy_vcpu(cpu);
1697 cpu->created = false;
1698 qemu_cond_signal(&qemu_cpu_cond);
1699 qemu_mutex_unlock_iothread();
1700 rcu_unregister_thread();
1701 return NULL;
1704 #ifdef _WIN32
1705 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1708 #endif
1710 /* Multi-threaded TCG
1712 * In the multi-threaded case each vCPU has its own thread. The TLS
1713 * variable current_cpu can be used deep in the code to find the
1714 * current CPUState for a given thread.
1717 static void *qemu_tcg_cpu_thread_fn(void *arg)
1719 CPUState *cpu = arg;
1721 assert(tcg_enabled());
1722 g_assert(!use_icount);
1724 rcu_register_thread();
1725 tcg_register_thread();
1727 qemu_mutex_lock_iothread();
1728 qemu_thread_get_self(cpu->thread);
1730 cpu->thread_id = qemu_get_thread_id();
1731 cpu->created = true;
1732 cpu->can_do_io = 1;
1733 current_cpu = cpu;
1734 qemu_cond_signal(&qemu_cpu_cond);
1735 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1737 /* process any pending work */
1738 cpu->exit_request = 1;
1740 do {
1741 if (cpu_can_run(cpu)) {
1742 int r;
1743 qemu_mutex_unlock_iothread();
1744 r = tcg_cpu_exec(cpu);
1745 qemu_mutex_lock_iothread();
1746 switch (r) {
1747 case EXCP_DEBUG:
1748 cpu_handle_guest_debug(cpu);
1749 break;
1750 case EXCP_HALTED:
1751 /* during start-up the vCPU is reset and the thread is
1752 * kicked several times. If we don't ensure we go back
1753 * to sleep in the halted state we won't cleanly
1754 * start-up when the vCPU is enabled.
1756 * cpu->halted should ensure we sleep in wait_io_event
1758 g_assert(cpu->halted);
1759 break;
1760 case EXCP_ATOMIC:
1761 qemu_mutex_unlock_iothread();
1762 cpu_exec_step_atomic(cpu);
1763 qemu_mutex_lock_iothread();
1764 default:
1765 /* Ignore everything else? */
1766 break;
1770 atomic_mb_set(&cpu->exit_request, 0);
1771 qemu_wait_io_event(cpu);
1772 } while (!cpu->unplug || cpu_can_run(cpu));
1774 qemu_tcg_destroy_vcpu(cpu);
1775 cpu->created = false;
1776 qemu_cond_signal(&qemu_cpu_cond);
1777 qemu_mutex_unlock_iothread();
1778 rcu_unregister_thread();
1779 return NULL;
1782 static void qemu_cpu_kick_thread(CPUState *cpu)
1784 #ifndef _WIN32
1785 int err;
1787 if (cpu->thread_kicked) {
1788 return;
1790 cpu->thread_kicked = true;
1791 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1792 if (err && err != ESRCH) {
1793 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1794 exit(1);
1796 #else /* _WIN32 */
1797 if (!qemu_cpu_is_self(cpu)) {
1798 if (whpx_enabled()) {
1799 whpx_vcpu_kick(cpu);
1800 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1801 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1802 __func__, GetLastError());
1803 exit(1);
1806 #endif
1809 void qemu_cpu_kick(CPUState *cpu)
1811 qemu_cond_broadcast(cpu->halt_cond);
1812 if (tcg_enabled()) {
1813 cpu_exit(cpu);
1814 /* NOP unless doing single-thread RR */
1815 qemu_cpu_kick_rr_cpu();
1816 } else {
1817 if (hax_enabled()) {
1819 * FIXME: race condition with the exit_request check in
1820 * hax_vcpu_hax_exec
1822 cpu->exit_request = 1;
1824 qemu_cpu_kick_thread(cpu);
1828 void qemu_cpu_kick_self(void)
1830 assert(current_cpu);
1831 qemu_cpu_kick_thread(current_cpu);
1834 bool qemu_cpu_is_self(CPUState *cpu)
1836 return qemu_thread_is_self(cpu->thread);
1839 bool qemu_in_vcpu_thread(void)
1841 return current_cpu && qemu_cpu_is_self(current_cpu);
1844 static __thread bool iothread_locked = false;
1846 bool qemu_mutex_iothread_locked(void)
1848 return iothread_locked;
1852 * The BQL is taken from so many places that it is worth profiling the
1853 * callers directly, instead of funneling them all through a single function.
1855 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1857 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1859 g_assert(!qemu_mutex_iothread_locked());
1860 bql_lock(&qemu_global_mutex, file, line);
1861 iothread_locked = true;
1864 void qemu_mutex_unlock_iothread(void)
1866 g_assert(qemu_mutex_iothread_locked());
1867 iothread_locked = false;
1868 qemu_mutex_unlock(&qemu_global_mutex);
1871 static bool all_vcpus_paused(void)
1873 CPUState *cpu;
1875 CPU_FOREACH(cpu) {
1876 if (!cpu->stopped) {
1877 return false;
1881 return true;
1884 void pause_all_vcpus(void)
1886 CPUState *cpu;
1888 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1889 CPU_FOREACH(cpu) {
1890 if (qemu_cpu_is_self(cpu)) {
1891 qemu_cpu_stop(cpu, true);
1892 } else {
1893 cpu->stop = true;
1894 qemu_cpu_kick(cpu);
1898 /* We need to drop the replay_lock so any vCPU threads woken up
1899 * can finish their replay tasks
1901 replay_mutex_unlock();
1903 while (!all_vcpus_paused()) {
1904 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1905 CPU_FOREACH(cpu) {
1906 qemu_cpu_kick(cpu);
1910 qemu_mutex_unlock_iothread();
1911 replay_mutex_lock();
1912 qemu_mutex_lock_iothread();
1915 void cpu_resume(CPUState *cpu)
1917 cpu->stop = false;
1918 cpu->stopped = false;
1919 qemu_cpu_kick(cpu);
1922 void resume_all_vcpus(void)
1924 CPUState *cpu;
1926 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1927 CPU_FOREACH(cpu) {
1928 cpu_resume(cpu);
1932 void cpu_remove_sync(CPUState *cpu)
1934 cpu->stop = true;
1935 cpu->unplug = true;
1936 qemu_cpu_kick(cpu);
1937 qemu_mutex_unlock_iothread();
1938 qemu_thread_join(cpu->thread);
1939 qemu_mutex_lock_iothread();
1942 /* For temporary buffers for forming a name */
1943 #define VCPU_THREAD_NAME_SIZE 16
1945 static void qemu_tcg_init_vcpu(CPUState *cpu)
1947 char thread_name[VCPU_THREAD_NAME_SIZE];
1948 static QemuCond *single_tcg_halt_cond;
1949 static QemuThread *single_tcg_cpu_thread;
1950 static int tcg_region_inited;
1952 assert(tcg_enabled());
1954 * Initialize TCG regions--once. Now is a good time, because:
1955 * (1) TCG's init context, prologue and target globals have been set up.
1956 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1957 * -accel flag is processed, so the check doesn't work then).
1959 if (!tcg_region_inited) {
1960 tcg_region_inited = 1;
1961 tcg_region_init();
1964 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1965 cpu->thread = g_malloc0(sizeof(QemuThread));
1966 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1967 qemu_cond_init(cpu->halt_cond);
1969 if (qemu_tcg_mttcg_enabled()) {
1970 /* create a thread per vCPU with TCG (MTTCG) */
1971 parallel_cpus = true;
1972 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1973 cpu->cpu_index);
1975 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1976 cpu, QEMU_THREAD_JOINABLE);
1978 } else {
1979 /* share a single thread for all cpus with TCG */
1980 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1981 qemu_thread_create(cpu->thread, thread_name,
1982 qemu_tcg_rr_cpu_thread_fn,
1983 cpu, QEMU_THREAD_JOINABLE);
1985 single_tcg_halt_cond = cpu->halt_cond;
1986 single_tcg_cpu_thread = cpu->thread;
1988 #ifdef _WIN32
1989 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1990 #endif
1991 } else {
1992 /* For non-MTTCG cases we share the thread */
1993 cpu->thread = single_tcg_cpu_thread;
1994 cpu->halt_cond = single_tcg_halt_cond;
1995 cpu->thread_id = first_cpu->thread_id;
1996 cpu->can_do_io = 1;
1997 cpu->created = true;
2001 static void qemu_hax_start_vcpu(CPUState *cpu)
2003 char thread_name[VCPU_THREAD_NAME_SIZE];
2005 cpu->thread = g_malloc0(sizeof(QemuThread));
2006 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2007 qemu_cond_init(cpu->halt_cond);
2009 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2010 cpu->cpu_index);
2011 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2012 cpu, QEMU_THREAD_JOINABLE);
2013 #ifdef _WIN32
2014 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2015 #endif
2018 static void qemu_kvm_start_vcpu(CPUState *cpu)
2020 char thread_name[VCPU_THREAD_NAME_SIZE];
2022 cpu->thread = g_malloc0(sizeof(QemuThread));
2023 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2024 qemu_cond_init(cpu->halt_cond);
2025 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2026 cpu->cpu_index);
2027 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2028 cpu, QEMU_THREAD_JOINABLE);
2031 static void qemu_hvf_start_vcpu(CPUState *cpu)
2033 char thread_name[VCPU_THREAD_NAME_SIZE];
2035 /* HVF currently does not support TCG, and only runs in
2036 * unrestricted-guest mode. */
2037 assert(hvf_enabled());
2039 cpu->thread = g_malloc0(sizeof(QemuThread));
2040 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2041 qemu_cond_init(cpu->halt_cond);
2043 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2044 cpu->cpu_index);
2045 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2046 cpu, QEMU_THREAD_JOINABLE);
2049 static void qemu_whpx_start_vcpu(CPUState *cpu)
2051 char thread_name[VCPU_THREAD_NAME_SIZE];
2053 cpu->thread = g_malloc0(sizeof(QemuThread));
2054 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2055 qemu_cond_init(cpu->halt_cond);
2056 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2057 cpu->cpu_index);
2058 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2059 cpu, QEMU_THREAD_JOINABLE);
2060 #ifdef _WIN32
2061 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2062 #endif
2065 static void qemu_dummy_start_vcpu(CPUState *cpu)
2067 char thread_name[VCPU_THREAD_NAME_SIZE];
2069 cpu->thread = g_malloc0(sizeof(QemuThread));
2070 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2071 qemu_cond_init(cpu->halt_cond);
2072 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2073 cpu->cpu_index);
2074 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2075 QEMU_THREAD_JOINABLE);
2078 void qemu_init_vcpu(CPUState *cpu)
2080 cpu->nr_cores = smp_cores;
2081 cpu->nr_threads = smp_threads;
2082 cpu->stopped = true;
2083 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2085 if (!cpu->as) {
2086 /* If the target cpu hasn't set up any address spaces itself,
2087 * give it the default one.
2089 cpu->num_ases = 1;
2090 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2093 if (kvm_enabled()) {
2094 qemu_kvm_start_vcpu(cpu);
2095 } else if (hax_enabled()) {
2096 qemu_hax_start_vcpu(cpu);
2097 } else if (hvf_enabled()) {
2098 qemu_hvf_start_vcpu(cpu);
2099 } else if (tcg_enabled()) {
2100 qemu_tcg_init_vcpu(cpu);
2101 } else if (whpx_enabled()) {
2102 qemu_whpx_start_vcpu(cpu);
2103 } else {
2104 qemu_dummy_start_vcpu(cpu);
2107 while (!cpu->created) {
2108 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2112 void cpu_stop_current(void)
2114 if (current_cpu) {
2115 current_cpu->stop = true;
2116 cpu_exit(current_cpu);
2120 int vm_stop(RunState state)
2122 if (qemu_in_vcpu_thread()) {
2123 qemu_system_vmstop_request_prepare();
2124 qemu_system_vmstop_request(state);
2126 * FIXME: should not return to device code in case
2127 * vm_stop() has been requested.
2129 cpu_stop_current();
2130 return 0;
2133 return do_vm_stop(state, true);
2137 * Prepare for (re)starting the VM.
2138 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2139 * running or in case of an error condition), 0 otherwise.
2141 int vm_prepare_start(void)
2143 RunState requested;
2145 qemu_vmstop_requested(&requested);
2146 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2147 return -1;
2150 /* Ensure that a STOP/RESUME pair of events is emitted if a
2151 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2152 * example, according to documentation is always followed by
2153 * the STOP event.
2155 if (runstate_is_running()) {
2156 qapi_event_send_stop();
2157 qapi_event_send_resume();
2158 return -1;
2161 /* We are sending this now, but the CPUs will be resumed shortly later */
2162 qapi_event_send_resume();
2164 replay_enable_events();
2165 cpu_enable_ticks();
2166 runstate_set(RUN_STATE_RUNNING);
2167 vm_state_notify(1, RUN_STATE_RUNNING);
2168 return 0;
2171 void vm_start(void)
2173 if (!vm_prepare_start()) {
2174 resume_all_vcpus();
2178 /* does a state transition even if the VM is already stopped,
2179 current state is forgotten forever */
2180 int vm_stop_force_state(RunState state)
2182 if (runstate_is_running()) {
2183 return vm_stop(state);
2184 } else {
2185 runstate_set(state);
2187 bdrv_drain_all();
2188 /* Make sure to return an error if the flush in a previous vm_stop()
2189 * failed. */
2190 return bdrv_flush_all();
2194 void list_cpus(const char *optarg)
2196 /* XXX: implement xxx_cpu_list for targets that still miss it */
2197 #if defined(cpu_list)
2198 cpu_list();
2199 #endif
2202 CpuInfoList *qmp_query_cpus(Error **errp)
2204 MachineState *ms = MACHINE(qdev_get_machine());
2205 MachineClass *mc = MACHINE_GET_CLASS(ms);
2206 CpuInfoList *head = NULL, *cur_item = NULL;
2207 CPUState *cpu;
2209 CPU_FOREACH(cpu) {
2210 CpuInfoList *info;
2211 #if defined(TARGET_I386)
2212 X86CPU *x86_cpu = X86_CPU(cpu);
2213 CPUX86State *env = &x86_cpu->env;
2214 #elif defined(TARGET_PPC)
2215 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2216 CPUPPCState *env = &ppc_cpu->env;
2217 #elif defined(TARGET_SPARC)
2218 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2219 CPUSPARCState *env = &sparc_cpu->env;
2220 #elif defined(TARGET_RISCV)
2221 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2222 CPURISCVState *env = &riscv_cpu->env;
2223 #elif defined(TARGET_MIPS)
2224 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2225 CPUMIPSState *env = &mips_cpu->env;
2226 #elif defined(TARGET_TRICORE)
2227 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2228 CPUTriCoreState *env = &tricore_cpu->env;
2229 #elif defined(TARGET_S390X)
2230 S390CPU *s390_cpu = S390_CPU(cpu);
2231 CPUS390XState *env = &s390_cpu->env;
2232 #endif
2234 cpu_synchronize_state(cpu);
2236 info = g_malloc0(sizeof(*info));
2237 info->value = g_malloc0(sizeof(*info->value));
2238 info->value->CPU = cpu->cpu_index;
2239 info->value->current = (cpu == first_cpu);
2240 info->value->halted = cpu->halted;
2241 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2242 info->value->thread_id = cpu->thread_id;
2243 #if defined(TARGET_I386)
2244 info->value->arch = CPU_INFO_ARCH_X86;
2245 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2246 #elif defined(TARGET_PPC)
2247 info->value->arch = CPU_INFO_ARCH_PPC;
2248 info->value->u.ppc.nip = env->nip;
2249 #elif defined(TARGET_SPARC)
2250 info->value->arch = CPU_INFO_ARCH_SPARC;
2251 info->value->u.q_sparc.pc = env->pc;
2252 info->value->u.q_sparc.npc = env->npc;
2253 #elif defined(TARGET_MIPS)
2254 info->value->arch = CPU_INFO_ARCH_MIPS;
2255 info->value->u.q_mips.PC = env->active_tc.PC;
2256 #elif defined(TARGET_TRICORE)
2257 info->value->arch = CPU_INFO_ARCH_TRICORE;
2258 info->value->u.tricore.PC = env->PC;
2259 #elif defined(TARGET_S390X)
2260 info->value->arch = CPU_INFO_ARCH_S390;
2261 info->value->u.s390.cpu_state = env->cpu_state;
2262 #elif defined(TARGET_RISCV)
2263 info->value->arch = CPU_INFO_ARCH_RISCV;
2264 info->value->u.riscv.pc = env->pc;
2265 #else
2266 info->value->arch = CPU_INFO_ARCH_OTHER;
2267 #endif
2268 info->value->has_props = !!mc->cpu_index_to_instance_props;
2269 if (info->value->has_props) {
2270 CpuInstanceProperties *props;
2271 props = g_malloc0(sizeof(*props));
2272 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2273 info->value->props = props;
2276 /* XXX: waiting for the qapi to support GSList */
2277 if (!cur_item) {
2278 head = cur_item = info;
2279 } else {
2280 cur_item->next = info;
2281 cur_item = info;
2285 return head;
2288 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2291 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2292 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2294 switch (target) {
2295 case SYS_EMU_TARGET_I386:
2296 case SYS_EMU_TARGET_X86_64:
2297 return CPU_INFO_ARCH_X86;
2299 case SYS_EMU_TARGET_PPC:
2300 case SYS_EMU_TARGET_PPC64:
2301 return CPU_INFO_ARCH_PPC;
2303 case SYS_EMU_TARGET_SPARC:
2304 case SYS_EMU_TARGET_SPARC64:
2305 return CPU_INFO_ARCH_SPARC;
2307 case SYS_EMU_TARGET_MIPS:
2308 case SYS_EMU_TARGET_MIPSEL:
2309 case SYS_EMU_TARGET_MIPS64:
2310 case SYS_EMU_TARGET_MIPS64EL:
2311 return CPU_INFO_ARCH_MIPS;
2313 case SYS_EMU_TARGET_TRICORE:
2314 return CPU_INFO_ARCH_TRICORE;
2316 case SYS_EMU_TARGET_S390X:
2317 return CPU_INFO_ARCH_S390;
2319 case SYS_EMU_TARGET_RISCV32:
2320 case SYS_EMU_TARGET_RISCV64:
2321 return CPU_INFO_ARCH_RISCV;
2323 default:
2324 return CPU_INFO_ARCH_OTHER;
2328 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2330 #ifdef TARGET_S390X
2331 S390CPU *s390_cpu = S390_CPU(cpu);
2332 CPUS390XState *env = &s390_cpu->env;
2334 info->cpu_state = env->cpu_state;
2335 #else
2336 abort();
2337 #endif
2341 * fast means: we NEVER interrupt vCPU threads to retrieve
2342 * information from KVM.
2344 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2346 MachineState *ms = MACHINE(qdev_get_machine());
2347 MachineClass *mc = MACHINE_GET_CLASS(ms);
2348 CpuInfoFastList *head = NULL, *cur_item = NULL;
2349 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2350 -1, &error_abort);
2351 CPUState *cpu;
2353 CPU_FOREACH(cpu) {
2354 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2355 info->value = g_malloc0(sizeof(*info->value));
2357 info->value->cpu_index = cpu->cpu_index;
2358 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2359 info->value->thread_id = cpu->thread_id;
2361 info->value->has_props = !!mc->cpu_index_to_instance_props;
2362 if (info->value->has_props) {
2363 CpuInstanceProperties *props;
2364 props = g_malloc0(sizeof(*props));
2365 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2366 info->value->props = props;
2369 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2370 info->value->target = target;
2371 if (target == SYS_EMU_TARGET_S390X) {
2372 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2375 if (!cur_item) {
2376 head = cur_item = info;
2377 } else {
2378 cur_item->next = info;
2379 cur_item = info;
2383 return head;
2386 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2387 bool has_cpu, int64_t cpu_index, Error **errp)
2389 FILE *f;
2390 uint32_t l;
2391 CPUState *cpu;
2392 uint8_t buf[1024];
2393 int64_t orig_addr = addr, orig_size = size;
2395 if (!has_cpu) {
2396 cpu_index = 0;
2399 cpu = qemu_get_cpu(cpu_index);
2400 if (cpu == NULL) {
2401 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2402 "a CPU number");
2403 return;
2406 f = fopen(filename, "wb");
2407 if (!f) {
2408 error_setg_file_open(errp, errno, filename);
2409 return;
2412 while (size != 0) {
2413 l = sizeof(buf);
2414 if (l > size)
2415 l = size;
2416 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2417 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2418 " specified", orig_addr, orig_size);
2419 goto exit;
2421 if (fwrite(buf, 1, l, f) != l) {
2422 error_setg(errp, QERR_IO_ERROR);
2423 goto exit;
2425 addr += l;
2426 size -= l;
2429 exit:
2430 fclose(f);
2433 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2434 Error **errp)
2436 FILE *f;
2437 uint32_t l;
2438 uint8_t buf[1024];
2440 f = fopen(filename, "wb");
2441 if (!f) {
2442 error_setg_file_open(errp, errno, filename);
2443 return;
2446 while (size != 0) {
2447 l = sizeof(buf);
2448 if (l > size)
2449 l = size;
2450 cpu_physical_memory_read(addr, buf, l);
2451 if (fwrite(buf, 1, l, f) != l) {
2452 error_setg(errp, QERR_IO_ERROR);
2453 goto exit;
2455 addr += l;
2456 size -= l;
2459 exit:
2460 fclose(f);
2463 void qmp_inject_nmi(Error **errp)
2465 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2468 void dump_drift_info(void)
2470 if (!use_icount) {
2471 return;
2474 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2475 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2476 if (icount_align_option) {
2477 qemu_printf("Max guest delay %"PRIi64" ms\n",
2478 -max_delay / SCALE_MS);
2479 qemu_printf("Max guest advance %"PRIi64" ms\n",
2480 max_advance / SCALE_MS);
2481 } else {
2482 qemu_printf("Max guest delay NA\n");
2483 qemu_printf("Max guest advance NA\n");