net/filter-rewriter.c: Fix coverity static analysis issue
[qemu.git] / cpus.c
bloba2b33ccb293e50bd977d700e32c4a8e09f61e1f3
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
57 #ifdef CONFIG_LINUX
59 #include <sys/prctl.h>
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
73 #endif /* CONFIG_LINUX */
75 int64_t max_delay;
76 int64_t max_advance;
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
86 bool cpu_is_stopped(CPUState *cpu)
88 return cpu->stopped || !runstate_is_running();
91 static bool cpu_thread_is_idle(CPUState *cpu)
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
96 if (cpu_is_stopped(cpu)) {
97 return true;
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
103 return true;
106 static bool all_cpu_threads_idle(void)
108 CPUState *cpu;
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
115 return true;
118 /***********************************************************/
119 /* guest cycle counter */
121 /* Protected by TimersState seqlock */
123 static bool icount_sleep = true;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 typedef struct TimersState {
128 /* Protected by BQL. */
129 int64_t cpu_ticks_prev;
130 int64_t cpu_ticks_offset;
132 /* Protect fields that can be respectively read outside the
133 * BQL, and written from multiple threads.
135 QemuSeqLock vm_clock_seqlock;
136 QemuSpin vm_clock_lock;
138 int16_t cpu_ticks_enabled;
140 /* Conversion factor from emulated instructions to virtual clock ticks. */
141 int16_t icount_time_shift;
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
146 int64_t vm_clock_warp_start;
147 int64_t cpu_clock_offset;
149 /* Only written by TCG thread */
150 int64_t qemu_icount;
152 /* for adjusting icount */
153 QEMUTimer *icount_rt_timer;
154 QEMUTimer *icount_vm_timer;
155 QEMUTimer *icount_warp_timer;
156 } TimersState;
158 static TimersState timers_state;
159 bool mttcg_enabled;
162 * We default to false if we know other options have been enabled
163 * which are currently incompatible with MTTCG. Otherwise when each
164 * guest (target) has been updated to support:
165 * - atomic instructions
166 * - memory ordering primitives (barriers)
167 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
169 * Once a guest architecture has been converted to the new primitives
170 * there are two remaining limitations to check.
172 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
173 * - The host must have a stronger memory order than the guest
175 * It may be possible in future to support strong guests on weak hosts
176 * but that will require tagging all load/stores in a guest with their
177 * implicit memory order requirements which would likely slow things
178 * down a lot.
181 static bool check_tcg_memory_orders_compatible(void)
183 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
184 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
185 #else
186 return false;
187 #endif
190 static bool default_mttcg_enabled(void)
192 if (use_icount || TCG_OVERSIZED_GUEST) {
193 return false;
194 } else {
195 #ifdef TARGET_SUPPORTS_MTTCG
196 return check_tcg_memory_orders_compatible();
197 #else
198 return false;
199 #endif
203 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
205 const char *t = qemu_opt_get(opts, "thread");
206 if (t) {
207 if (strcmp(t, "multi") == 0) {
208 if (TCG_OVERSIZED_GUEST) {
209 error_setg(errp, "No MTTCG when guest word size > hosts");
210 } else if (use_icount) {
211 error_setg(errp, "No MTTCG when icount is enabled");
212 } else {
213 #ifndef TARGET_SUPPORTS_MTTCG
214 warn_report("Guest not yet converted to MTTCG - "
215 "you may get unexpected results");
216 #endif
217 if (!check_tcg_memory_orders_compatible()) {
218 warn_report("Guest expects a stronger memory ordering "
219 "than the host provides");
220 error_printf("This may cause strange/hard to debug errors\n");
222 mttcg_enabled = true;
224 } else if (strcmp(t, "single") == 0) {
225 mttcg_enabled = false;
226 } else {
227 error_setg(errp, "Invalid 'thread' setting %s", t);
229 } else {
230 mttcg_enabled = default_mttcg_enabled();
234 /* The current number of executed instructions is based on what we
235 * originally budgeted minus the current state of the decrementing
236 * icount counters in extra/u16.low.
238 static int64_t cpu_get_icount_executed(CPUState *cpu)
240 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
244 * Update the global shared timer_state.qemu_icount to take into
245 * account executed instructions. This is done by the TCG vCPU
246 * thread so the main-loop can see time has moved forward.
248 static void cpu_update_icount_locked(CPUState *cpu)
250 int64_t executed = cpu_get_icount_executed(cpu);
251 cpu->icount_budget -= executed;
253 atomic_set_i64(&timers_state.qemu_icount,
254 timers_state.qemu_icount + executed);
258 * Update the global shared timer_state.qemu_icount to take into
259 * account executed instructions. This is done by the TCG vCPU
260 * thread so the main-loop can see time has moved forward.
262 void cpu_update_icount(CPUState *cpu)
264 seqlock_write_lock(&timers_state.vm_clock_seqlock,
265 &timers_state.vm_clock_lock);
266 cpu_update_icount_locked(cpu);
267 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
271 static int64_t cpu_get_icount_raw_locked(void)
273 CPUState *cpu = current_cpu;
275 if (cpu && cpu->running) {
276 if (!cpu->can_do_io) {
277 error_report("Bad icount read");
278 exit(1);
280 /* Take into account what has run */
281 cpu_update_icount_locked(cpu);
283 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
284 return atomic_read_i64(&timers_state.qemu_icount);
287 static int64_t cpu_get_icount_locked(void)
289 int64_t icount = cpu_get_icount_raw_locked();
290 return atomic_read_i64(&timers_state.qemu_icount_bias) +
291 cpu_icount_to_ns(icount);
294 int64_t cpu_get_icount_raw(void)
296 int64_t icount;
297 unsigned start;
299 do {
300 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
301 icount = cpu_get_icount_raw_locked();
302 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
304 return icount;
307 /* Return the virtual CPU time, based on the instruction counter. */
308 int64_t cpu_get_icount(void)
310 int64_t icount;
311 unsigned start;
313 do {
314 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
315 icount = cpu_get_icount_locked();
316 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
318 return icount;
321 int64_t cpu_icount_to_ns(int64_t icount)
323 return icount << atomic_read(&timers_state.icount_time_shift);
326 static int64_t cpu_get_ticks_locked(void)
328 int64_t ticks = timers_state.cpu_ticks_offset;
329 if (timers_state.cpu_ticks_enabled) {
330 ticks += cpu_get_host_ticks();
333 if (timers_state.cpu_ticks_prev > ticks) {
334 /* Non increasing ticks may happen if the host uses software suspend. */
335 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
336 ticks = timers_state.cpu_ticks_prev;
339 timers_state.cpu_ticks_prev = ticks;
340 return ticks;
343 /* return the time elapsed in VM between vm_start and vm_stop. Unless
344 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
345 * counter.
347 int64_t cpu_get_ticks(void)
349 int64_t ticks;
351 if (use_icount) {
352 return cpu_get_icount();
355 qemu_spin_lock(&timers_state.vm_clock_lock);
356 ticks = cpu_get_ticks_locked();
357 qemu_spin_unlock(&timers_state.vm_clock_lock);
358 return ticks;
361 static int64_t cpu_get_clock_locked(void)
363 int64_t time;
365 time = timers_state.cpu_clock_offset;
366 if (timers_state.cpu_ticks_enabled) {
367 time += get_clock();
370 return time;
373 /* Return the monotonic time elapsed in VM, i.e.,
374 * the time between vm_start and vm_stop
376 int64_t cpu_get_clock(void)
378 int64_t ti;
379 unsigned start;
381 do {
382 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
383 ti = cpu_get_clock_locked();
384 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
386 return ti;
389 /* enable cpu_get_ticks()
390 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
392 void cpu_enable_ticks(void)
394 seqlock_write_lock(&timers_state.vm_clock_seqlock,
395 &timers_state.vm_clock_lock);
396 if (!timers_state.cpu_ticks_enabled) {
397 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
398 timers_state.cpu_clock_offset -= get_clock();
399 timers_state.cpu_ticks_enabled = 1;
401 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
402 &timers_state.vm_clock_lock);
405 /* disable cpu_get_ticks() : the clock is stopped. You must not call
406 * cpu_get_ticks() after that.
407 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
409 void cpu_disable_ticks(void)
411 seqlock_write_lock(&timers_state.vm_clock_seqlock,
412 &timers_state.vm_clock_lock);
413 if (timers_state.cpu_ticks_enabled) {
414 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
415 timers_state.cpu_clock_offset = cpu_get_clock_locked();
416 timers_state.cpu_ticks_enabled = 0;
418 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
419 &timers_state.vm_clock_lock);
422 /* Correlation between real and virtual time is always going to be
423 fairly approximate, so ignore small variation.
424 When the guest is idle real and virtual time will be aligned in
425 the IO wait loop. */
426 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
428 static void icount_adjust(void)
430 int64_t cur_time;
431 int64_t cur_icount;
432 int64_t delta;
434 /* Protected by TimersState mutex. */
435 static int64_t last_delta;
437 /* If the VM is not running, then do nothing. */
438 if (!runstate_is_running()) {
439 return;
442 seqlock_write_lock(&timers_state.vm_clock_seqlock,
443 &timers_state.vm_clock_lock);
444 cur_time = cpu_get_clock_locked();
445 cur_icount = cpu_get_icount_locked();
447 delta = cur_icount - cur_time;
448 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
449 if (delta > 0
450 && last_delta + ICOUNT_WOBBLE < delta * 2
451 && timers_state.icount_time_shift > 0) {
452 /* The guest is getting too far ahead. Slow time down. */
453 atomic_set(&timers_state.icount_time_shift,
454 timers_state.icount_time_shift - 1);
456 if (delta < 0
457 && last_delta - ICOUNT_WOBBLE > delta * 2
458 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
459 /* The guest is getting too far behind. Speed time up. */
460 atomic_set(&timers_state.icount_time_shift,
461 timers_state.icount_time_shift + 1);
463 last_delta = delta;
464 atomic_set_i64(&timers_state.qemu_icount_bias,
465 cur_icount - (timers_state.qemu_icount
466 << timers_state.icount_time_shift));
467 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468 &timers_state.vm_clock_lock);
471 static void icount_adjust_rt(void *opaque)
473 timer_mod(timers_state.icount_rt_timer,
474 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
475 icount_adjust();
478 static void icount_adjust_vm(void *opaque)
480 timer_mod(timers_state.icount_vm_timer,
481 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
482 NANOSECONDS_PER_SECOND / 10);
483 icount_adjust();
486 static int64_t qemu_icount_round(int64_t count)
488 int shift = atomic_read(&timers_state.icount_time_shift);
489 return (count + (1 << shift) - 1) >> shift;
492 static void icount_warp_rt(void)
494 unsigned seq;
495 int64_t warp_start;
497 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
498 * changes from -1 to another value, so the race here is okay.
500 do {
501 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
502 warp_start = timers_state.vm_clock_warp_start;
503 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
505 if (warp_start == -1) {
506 return;
509 seqlock_write_lock(&timers_state.vm_clock_seqlock,
510 &timers_state.vm_clock_lock);
511 if (runstate_is_running()) {
512 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
513 cpu_get_clock_locked());
514 int64_t warp_delta;
516 warp_delta = clock - timers_state.vm_clock_warp_start;
517 if (use_icount == 2) {
519 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
520 * far ahead of real time.
522 int64_t cur_icount = cpu_get_icount_locked();
523 int64_t delta = clock - cur_icount;
524 warp_delta = MIN(warp_delta, delta);
526 atomic_set_i64(&timers_state.qemu_icount_bias,
527 timers_state.qemu_icount_bias + warp_delta);
529 timers_state.vm_clock_warp_start = -1;
530 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
531 &timers_state.vm_clock_lock);
533 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
534 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
538 static void icount_timer_cb(void *opaque)
540 /* No need for a checkpoint because the timer already synchronizes
541 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
543 icount_warp_rt();
546 void qtest_clock_warp(int64_t dest)
548 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
549 AioContext *aio_context;
550 assert(qtest_enabled());
551 aio_context = qemu_get_aio_context();
552 while (clock < dest) {
553 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
554 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
556 seqlock_write_lock(&timers_state.vm_clock_seqlock,
557 &timers_state.vm_clock_lock);
558 atomic_set_i64(&timers_state.qemu_icount_bias,
559 timers_state.qemu_icount_bias + warp);
560 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
561 &timers_state.vm_clock_lock);
563 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
564 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
565 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
567 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
570 void qemu_start_warp_timer(void)
572 int64_t clock;
573 int64_t deadline;
575 if (!use_icount) {
576 return;
579 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
580 * do not fire, so computing the deadline does not make sense.
582 if (!runstate_is_running()) {
583 return;
586 if (replay_mode != REPLAY_MODE_PLAY) {
587 if (!all_cpu_threads_idle()) {
588 return;
591 if (qtest_enabled()) {
592 /* When testing, qtest commands advance icount. */
593 return;
596 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
597 } else {
598 /* warp clock deterministically in record/replay mode */
599 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
600 /* vCPU is sleeping and warp can't be started.
601 It is probably a race condition: notification sent
602 to vCPU was processed in advance and vCPU went to sleep.
603 Therefore we have to wake it up for doing someting. */
604 if (replay_has_checkpoint()) {
605 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
607 return;
611 /* We want to use the earliest deadline from ALL vm_clocks */
612 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
613 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
614 if (deadline < 0) {
615 static bool notified;
616 if (!icount_sleep && !notified) {
617 warn_report("icount sleep disabled and no active timers");
618 notified = true;
620 return;
623 if (deadline > 0) {
625 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
626 * sleep. Otherwise, the CPU might be waiting for a future timer
627 * interrupt to wake it up, but the interrupt never comes because
628 * the vCPU isn't running any insns and thus doesn't advance the
629 * QEMU_CLOCK_VIRTUAL.
631 if (!icount_sleep) {
633 * We never let VCPUs sleep in no sleep icount mode.
634 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
635 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
636 * It is useful when we want a deterministic execution time,
637 * isolated from host latencies.
639 seqlock_write_lock(&timers_state.vm_clock_seqlock,
640 &timers_state.vm_clock_lock);
641 atomic_set_i64(&timers_state.qemu_icount_bias,
642 timers_state.qemu_icount_bias + deadline);
643 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
644 &timers_state.vm_clock_lock);
645 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
646 } else {
648 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
649 * "real" time, (related to the time left until the next event) has
650 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
651 * This avoids that the warps are visible externally; for example,
652 * you will not be sending network packets continuously instead of
653 * every 100ms.
655 seqlock_write_lock(&timers_state.vm_clock_seqlock,
656 &timers_state.vm_clock_lock);
657 if (timers_state.vm_clock_warp_start == -1
658 || timers_state.vm_clock_warp_start > clock) {
659 timers_state.vm_clock_warp_start = clock;
661 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
662 &timers_state.vm_clock_lock);
663 timer_mod_anticipate(timers_state.icount_warp_timer,
664 clock + deadline);
666 } else if (deadline == 0) {
667 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
671 static void qemu_account_warp_timer(void)
673 if (!use_icount || !icount_sleep) {
674 return;
677 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
678 * do not fire, so computing the deadline does not make sense.
680 if (!runstate_is_running()) {
681 return;
684 /* warp clock deterministically in record/replay mode */
685 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
686 return;
689 timer_del(timers_state.icount_warp_timer);
690 icount_warp_rt();
693 static bool icount_state_needed(void *opaque)
695 return use_icount;
698 static bool warp_timer_state_needed(void *opaque)
700 TimersState *s = opaque;
701 return s->icount_warp_timer != NULL;
704 static bool adjust_timers_state_needed(void *opaque)
706 TimersState *s = opaque;
707 return s->icount_rt_timer != NULL;
711 * Subsection for warp timer migration is optional, because may not be created
713 static const VMStateDescription icount_vmstate_warp_timer = {
714 .name = "timer/icount/warp_timer",
715 .version_id = 1,
716 .minimum_version_id = 1,
717 .needed = warp_timer_state_needed,
718 .fields = (VMStateField[]) {
719 VMSTATE_INT64(vm_clock_warp_start, TimersState),
720 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
721 VMSTATE_END_OF_LIST()
725 static const VMStateDescription icount_vmstate_adjust_timers = {
726 .name = "timer/icount/timers",
727 .version_id = 1,
728 .minimum_version_id = 1,
729 .needed = adjust_timers_state_needed,
730 .fields = (VMStateField[]) {
731 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
732 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
733 VMSTATE_END_OF_LIST()
738 * This is a subsection for icount migration.
740 static const VMStateDescription icount_vmstate_timers = {
741 .name = "timer/icount",
742 .version_id = 1,
743 .minimum_version_id = 1,
744 .needed = icount_state_needed,
745 .fields = (VMStateField[]) {
746 VMSTATE_INT64(qemu_icount_bias, TimersState),
747 VMSTATE_INT64(qemu_icount, TimersState),
748 VMSTATE_END_OF_LIST()
750 .subsections = (const VMStateDescription*[]) {
751 &icount_vmstate_warp_timer,
752 &icount_vmstate_adjust_timers,
753 NULL
757 static const VMStateDescription vmstate_timers = {
758 .name = "timer",
759 .version_id = 2,
760 .minimum_version_id = 1,
761 .fields = (VMStateField[]) {
762 VMSTATE_INT64(cpu_ticks_offset, TimersState),
763 VMSTATE_UNUSED(8),
764 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
765 VMSTATE_END_OF_LIST()
767 .subsections = (const VMStateDescription*[]) {
768 &icount_vmstate_timers,
769 NULL
773 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
775 double pct;
776 double throttle_ratio;
777 long sleeptime_ns;
779 if (!cpu_throttle_get_percentage()) {
780 return;
783 pct = (double)cpu_throttle_get_percentage()/100;
784 throttle_ratio = pct / (1 - pct);
785 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
787 qemu_mutex_unlock_iothread();
788 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
789 qemu_mutex_lock_iothread();
790 atomic_set(&cpu->throttle_thread_scheduled, 0);
793 static void cpu_throttle_timer_tick(void *opaque)
795 CPUState *cpu;
796 double pct;
798 /* Stop the timer if needed */
799 if (!cpu_throttle_get_percentage()) {
800 return;
802 CPU_FOREACH(cpu) {
803 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
804 async_run_on_cpu(cpu, cpu_throttle_thread,
805 RUN_ON_CPU_NULL);
809 pct = (double)cpu_throttle_get_percentage()/100;
810 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
811 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
814 void cpu_throttle_set(int new_throttle_pct)
816 /* Ensure throttle percentage is within valid range */
817 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
818 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
820 atomic_set(&throttle_percentage, new_throttle_pct);
822 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
823 CPU_THROTTLE_TIMESLICE_NS);
826 void cpu_throttle_stop(void)
828 atomic_set(&throttle_percentage, 0);
831 bool cpu_throttle_active(void)
833 return (cpu_throttle_get_percentage() != 0);
836 int cpu_throttle_get_percentage(void)
838 return atomic_read(&throttle_percentage);
841 void cpu_ticks_init(void)
843 seqlock_init(&timers_state.vm_clock_seqlock);
844 qemu_spin_init(&timers_state.vm_clock_lock);
845 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
846 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
847 cpu_throttle_timer_tick, NULL);
850 void configure_icount(QemuOpts *opts, Error **errp)
852 const char *option;
853 char *rem_str = NULL;
855 option = qemu_opt_get(opts, "shift");
856 if (!option) {
857 if (qemu_opt_get(opts, "align") != NULL) {
858 error_setg(errp, "Please specify shift option when using align");
860 return;
863 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
864 if (icount_sleep) {
865 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
866 icount_timer_cb, NULL);
869 icount_align_option = qemu_opt_get_bool(opts, "align", false);
871 if (icount_align_option && !icount_sleep) {
872 error_setg(errp, "align=on and sleep=off are incompatible");
874 if (strcmp(option, "auto") != 0) {
875 errno = 0;
876 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
877 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
878 error_setg(errp, "icount: Invalid shift value");
880 use_icount = 1;
881 return;
882 } else if (icount_align_option) {
883 error_setg(errp, "shift=auto and align=on are incompatible");
884 } else if (!icount_sleep) {
885 error_setg(errp, "shift=auto and sleep=off are incompatible");
888 use_icount = 2;
890 /* 125MIPS seems a reasonable initial guess at the guest speed.
891 It will be corrected fairly quickly anyway. */
892 timers_state.icount_time_shift = 3;
894 /* Have both realtime and virtual time triggers for speed adjustment.
895 The realtime trigger catches emulated time passing too slowly,
896 the virtual time trigger catches emulated time passing too fast.
897 Realtime triggers occur even when idle, so use them less frequently
898 than VM triggers. */
899 timers_state.vm_clock_warp_start = -1;
900 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
901 icount_adjust_rt, NULL);
902 timer_mod(timers_state.icount_rt_timer,
903 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
904 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
905 icount_adjust_vm, NULL);
906 timer_mod(timers_state.icount_vm_timer,
907 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
908 NANOSECONDS_PER_SECOND / 10);
911 /***********************************************************/
912 /* TCG vCPU kick timer
914 * The kick timer is responsible for moving single threaded vCPU
915 * emulation on to the next vCPU. If more than one vCPU is running a
916 * timer event with force a cpu->exit so the next vCPU can get
917 * scheduled.
919 * The timer is removed if all vCPUs are idle and restarted again once
920 * idleness is complete.
923 static QEMUTimer *tcg_kick_vcpu_timer;
924 static CPUState *tcg_current_rr_cpu;
926 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
928 static inline int64_t qemu_tcg_next_kick(void)
930 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
933 /* Kick the currently round-robin scheduled vCPU */
934 static void qemu_cpu_kick_rr_cpu(void)
936 CPUState *cpu;
937 do {
938 cpu = atomic_mb_read(&tcg_current_rr_cpu);
939 if (cpu) {
940 cpu_exit(cpu);
942 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
945 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
949 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
951 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
952 qemu_notify_event();
953 return;
956 if (qemu_in_vcpu_thread()) {
957 /* A CPU is currently running; kick it back out to the
958 * tcg_cpu_exec() loop so it will recalculate its
959 * icount deadline immediately.
961 qemu_cpu_kick(current_cpu);
962 } else if (first_cpu) {
963 /* qemu_cpu_kick is not enough to kick a halted CPU out of
964 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
965 * causes cpu_thread_is_idle to return false. This way,
966 * handle_icount_deadline can run.
967 * If we have no CPUs at all for some reason, we don't
968 * need to do anything.
970 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
974 static void kick_tcg_thread(void *opaque)
976 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
977 qemu_cpu_kick_rr_cpu();
980 static void start_tcg_kick_timer(void)
982 assert(!mttcg_enabled);
983 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
984 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
985 kick_tcg_thread, NULL);
987 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
988 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
992 static void stop_tcg_kick_timer(void)
994 assert(!mttcg_enabled);
995 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
996 timer_del(tcg_kick_vcpu_timer);
1000 /***********************************************************/
1001 void hw_error(const char *fmt, ...)
1003 va_list ap;
1004 CPUState *cpu;
1006 va_start(ap, fmt);
1007 fprintf(stderr, "qemu: hardware error: ");
1008 vfprintf(stderr, fmt, ap);
1009 fprintf(stderr, "\n");
1010 CPU_FOREACH(cpu) {
1011 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1012 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
1014 va_end(ap);
1015 abort();
1018 void cpu_synchronize_all_states(void)
1020 CPUState *cpu;
1022 CPU_FOREACH(cpu) {
1023 cpu_synchronize_state(cpu);
1024 /* TODO: move to cpu_synchronize_state() */
1025 if (hvf_enabled()) {
1026 hvf_cpu_synchronize_state(cpu);
1031 void cpu_synchronize_all_post_reset(void)
1033 CPUState *cpu;
1035 CPU_FOREACH(cpu) {
1036 cpu_synchronize_post_reset(cpu);
1037 /* TODO: move to cpu_synchronize_post_reset() */
1038 if (hvf_enabled()) {
1039 hvf_cpu_synchronize_post_reset(cpu);
1044 void cpu_synchronize_all_post_init(void)
1046 CPUState *cpu;
1048 CPU_FOREACH(cpu) {
1049 cpu_synchronize_post_init(cpu);
1050 /* TODO: move to cpu_synchronize_post_init() */
1051 if (hvf_enabled()) {
1052 hvf_cpu_synchronize_post_init(cpu);
1057 void cpu_synchronize_all_pre_loadvm(void)
1059 CPUState *cpu;
1061 CPU_FOREACH(cpu) {
1062 cpu_synchronize_pre_loadvm(cpu);
1066 static int do_vm_stop(RunState state, bool send_stop)
1068 int ret = 0;
1070 if (runstate_is_running()) {
1071 cpu_disable_ticks();
1072 pause_all_vcpus();
1073 runstate_set(state);
1074 vm_state_notify(0, state);
1075 if (send_stop) {
1076 qapi_event_send_stop();
1080 bdrv_drain_all();
1081 replay_disable_events();
1082 ret = bdrv_flush_all();
1084 return ret;
1087 /* Special vm_stop() variant for terminating the process. Historically clients
1088 * did not expect a QMP STOP event and so we need to retain compatibility.
1090 int vm_shutdown(void)
1092 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1095 static bool cpu_can_run(CPUState *cpu)
1097 if (cpu->stop) {
1098 return false;
1100 if (cpu_is_stopped(cpu)) {
1101 return false;
1103 return true;
1106 static void cpu_handle_guest_debug(CPUState *cpu)
1108 gdb_set_stop_cpu(cpu);
1109 qemu_system_debug_request();
1110 cpu->stopped = true;
1113 #ifdef CONFIG_LINUX
1114 static void sigbus_reraise(void)
1116 sigset_t set;
1117 struct sigaction action;
1119 memset(&action, 0, sizeof(action));
1120 action.sa_handler = SIG_DFL;
1121 if (!sigaction(SIGBUS, &action, NULL)) {
1122 raise(SIGBUS);
1123 sigemptyset(&set);
1124 sigaddset(&set, SIGBUS);
1125 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1127 perror("Failed to re-raise SIGBUS!\n");
1128 abort();
1131 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1133 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1134 sigbus_reraise();
1137 if (current_cpu) {
1138 /* Called asynchronously in VCPU thread. */
1139 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1140 sigbus_reraise();
1142 } else {
1143 /* Called synchronously (via signalfd) in main thread. */
1144 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1145 sigbus_reraise();
1150 static void qemu_init_sigbus(void)
1152 struct sigaction action;
1154 memset(&action, 0, sizeof(action));
1155 action.sa_flags = SA_SIGINFO;
1156 action.sa_sigaction = sigbus_handler;
1157 sigaction(SIGBUS, &action, NULL);
1159 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1161 #else /* !CONFIG_LINUX */
1162 static void qemu_init_sigbus(void)
1165 #endif /* !CONFIG_LINUX */
1167 static QemuMutex qemu_global_mutex;
1169 static QemuThread io_thread;
1171 /* cpu creation */
1172 static QemuCond qemu_cpu_cond;
1173 /* system init */
1174 static QemuCond qemu_pause_cond;
1176 void qemu_init_cpu_loop(void)
1178 qemu_init_sigbus();
1179 qemu_cond_init(&qemu_cpu_cond);
1180 qemu_cond_init(&qemu_pause_cond);
1181 qemu_mutex_init(&qemu_global_mutex);
1183 qemu_thread_get_self(&io_thread);
1186 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1188 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1191 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1193 if (kvm_destroy_vcpu(cpu) < 0) {
1194 error_report("kvm_destroy_vcpu failed");
1195 exit(EXIT_FAILURE);
1199 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1203 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1205 g_assert(qemu_cpu_is_self(cpu));
1206 cpu->stop = false;
1207 cpu->stopped = true;
1208 if (exit) {
1209 cpu_exit(cpu);
1211 qemu_cond_broadcast(&qemu_pause_cond);
1214 static void qemu_wait_io_event_common(CPUState *cpu)
1216 atomic_mb_set(&cpu->thread_kicked, false);
1217 if (cpu->stop) {
1218 qemu_cpu_stop(cpu, false);
1220 process_queued_cpu_work(cpu);
1223 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1225 while (all_cpu_threads_idle()) {
1226 stop_tcg_kick_timer();
1227 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1230 start_tcg_kick_timer();
1232 qemu_wait_io_event_common(cpu);
1235 static void qemu_wait_io_event(CPUState *cpu)
1237 while (cpu_thread_is_idle(cpu)) {
1238 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1241 #ifdef _WIN32
1242 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1243 if (!tcg_enabled()) {
1244 SleepEx(0, TRUE);
1246 #endif
1247 qemu_wait_io_event_common(cpu);
1250 static void *qemu_kvm_cpu_thread_fn(void *arg)
1252 CPUState *cpu = arg;
1253 int r;
1255 rcu_register_thread();
1257 qemu_mutex_lock_iothread();
1258 qemu_thread_get_self(cpu->thread);
1259 cpu->thread_id = qemu_get_thread_id();
1260 cpu->can_do_io = 1;
1261 current_cpu = cpu;
1263 r = kvm_init_vcpu(cpu);
1264 if (r < 0) {
1265 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1266 exit(1);
1269 kvm_init_cpu_signals(cpu);
1271 /* signal CPU creation */
1272 cpu->created = true;
1273 qemu_cond_signal(&qemu_cpu_cond);
1275 do {
1276 if (cpu_can_run(cpu)) {
1277 r = kvm_cpu_exec(cpu);
1278 if (r == EXCP_DEBUG) {
1279 cpu_handle_guest_debug(cpu);
1282 qemu_wait_io_event(cpu);
1283 } while (!cpu->unplug || cpu_can_run(cpu));
1285 qemu_kvm_destroy_vcpu(cpu);
1286 cpu->created = false;
1287 qemu_cond_signal(&qemu_cpu_cond);
1288 qemu_mutex_unlock_iothread();
1289 rcu_unregister_thread();
1290 return NULL;
1293 static void *qemu_dummy_cpu_thread_fn(void *arg)
1295 #ifdef _WIN32
1296 error_report("qtest is not supported under Windows");
1297 exit(1);
1298 #else
1299 CPUState *cpu = arg;
1300 sigset_t waitset;
1301 int r;
1303 rcu_register_thread();
1305 qemu_mutex_lock_iothread();
1306 qemu_thread_get_self(cpu->thread);
1307 cpu->thread_id = qemu_get_thread_id();
1308 cpu->can_do_io = 1;
1309 current_cpu = cpu;
1311 sigemptyset(&waitset);
1312 sigaddset(&waitset, SIG_IPI);
1314 /* signal CPU creation */
1315 cpu->created = true;
1316 qemu_cond_signal(&qemu_cpu_cond);
1318 do {
1319 qemu_mutex_unlock_iothread();
1320 do {
1321 int sig;
1322 r = sigwait(&waitset, &sig);
1323 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1324 if (r == -1) {
1325 perror("sigwait");
1326 exit(1);
1328 qemu_mutex_lock_iothread();
1329 qemu_wait_io_event(cpu);
1330 } while (!cpu->unplug);
1332 rcu_unregister_thread();
1333 return NULL;
1334 #endif
1337 static int64_t tcg_get_icount_limit(void)
1339 int64_t deadline;
1341 if (replay_mode != REPLAY_MODE_PLAY) {
1342 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1344 /* Maintain prior (possibly buggy) behaviour where if no deadline
1345 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1346 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1347 * nanoseconds.
1349 if ((deadline < 0) || (deadline > INT32_MAX)) {
1350 deadline = INT32_MAX;
1353 return qemu_icount_round(deadline);
1354 } else {
1355 return replay_get_instructions();
1359 static void handle_icount_deadline(void)
1361 assert(qemu_in_vcpu_thread());
1362 if (use_icount) {
1363 int64_t deadline =
1364 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1366 if (deadline == 0) {
1367 /* Wake up other AioContexts. */
1368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1369 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1374 static void prepare_icount_for_run(CPUState *cpu)
1376 if (use_icount) {
1377 int insns_left;
1379 /* These should always be cleared by process_icount_data after
1380 * each vCPU execution. However u16.high can be raised
1381 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1383 g_assert(cpu->icount_decr.u16.low == 0);
1384 g_assert(cpu->icount_extra == 0);
1386 cpu->icount_budget = tcg_get_icount_limit();
1387 insns_left = MIN(0xffff, cpu->icount_budget);
1388 cpu->icount_decr.u16.low = insns_left;
1389 cpu->icount_extra = cpu->icount_budget - insns_left;
1391 replay_mutex_lock();
1395 static void process_icount_data(CPUState *cpu)
1397 if (use_icount) {
1398 /* Account for executed instructions */
1399 cpu_update_icount(cpu);
1401 /* Reset the counters */
1402 cpu->icount_decr.u16.low = 0;
1403 cpu->icount_extra = 0;
1404 cpu->icount_budget = 0;
1406 replay_account_executed_instructions();
1408 replay_mutex_unlock();
1413 static int tcg_cpu_exec(CPUState *cpu)
1415 int ret;
1416 #ifdef CONFIG_PROFILER
1417 int64_t ti;
1418 #endif
1420 assert(tcg_enabled());
1421 #ifdef CONFIG_PROFILER
1422 ti = profile_getclock();
1423 #endif
1424 cpu_exec_start(cpu);
1425 ret = cpu_exec(cpu);
1426 cpu_exec_end(cpu);
1427 #ifdef CONFIG_PROFILER
1428 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1429 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1430 #endif
1431 return ret;
1434 /* Destroy any remaining vCPUs which have been unplugged and have
1435 * finished running
1437 static void deal_with_unplugged_cpus(void)
1439 CPUState *cpu;
1441 CPU_FOREACH(cpu) {
1442 if (cpu->unplug && !cpu_can_run(cpu)) {
1443 qemu_tcg_destroy_vcpu(cpu);
1444 cpu->created = false;
1445 qemu_cond_signal(&qemu_cpu_cond);
1446 break;
1451 /* Single-threaded TCG
1453 * In the single-threaded case each vCPU is simulated in turn. If
1454 * there is more than a single vCPU we create a simple timer to kick
1455 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1456 * This is done explicitly rather than relying on side-effects
1457 * elsewhere.
1460 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1462 CPUState *cpu = arg;
1464 assert(tcg_enabled());
1465 rcu_register_thread();
1466 tcg_register_thread();
1468 qemu_mutex_lock_iothread();
1469 qemu_thread_get_self(cpu->thread);
1471 cpu->thread_id = qemu_get_thread_id();
1472 cpu->created = true;
1473 cpu->can_do_io = 1;
1474 qemu_cond_signal(&qemu_cpu_cond);
1476 /* wait for initial kick-off after machine start */
1477 while (first_cpu->stopped) {
1478 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1480 /* process any pending work */
1481 CPU_FOREACH(cpu) {
1482 current_cpu = cpu;
1483 qemu_wait_io_event_common(cpu);
1487 start_tcg_kick_timer();
1489 cpu = first_cpu;
1491 /* process any pending work */
1492 cpu->exit_request = 1;
1494 while (1) {
1495 qemu_mutex_unlock_iothread();
1496 replay_mutex_lock();
1497 qemu_mutex_lock_iothread();
1498 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1499 qemu_account_warp_timer();
1501 /* Run the timers here. This is much more efficient than
1502 * waking up the I/O thread and waiting for completion.
1504 handle_icount_deadline();
1506 replay_mutex_unlock();
1508 if (!cpu) {
1509 cpu = first_cpu;
1512 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1514 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1515 current_cpu = cpu;
1517 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1518 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1520 if (cpu_can_run(cpu)) {
1521 int r;
1523 qemu_mutex_unlock_iothread();
1524 prepare_icount_for_run(cpu);
1526 r = tcg_cpu_exec(cpu);
1528 process_icount_data(cpu);
1529 qemu_mutex_lock_iothread();
1531 if (r == EXCP_DEBUG) {
1532 cpu_handle_guest_debug(cpu);
1533 break;
1534 } else if (r == EXCP_ATOMIC) {
1535 qemu_mutex_unlock_iothread();
1536 cpu_exec_step_atomic(cpu);
1537 qemu_mutex_lock_iothread();
1538 break;
1540 } else if (cpu->stop) {
1541 if (cpu->unplug) {
1542 cpu = CPU_NEXT(cpu);
1544 break;
1547 cpu = CPU_NEXT(cpu);
1548 } /* while (cpu && !cpu->exit_request).. */
1550 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1551 atomic_set(&tcg_current_rr_cpu, NULL);
1553 if (cpu && cpu->exit_request) {
1554 atomic_mb_set(&cpu->exit_request, 0);
1557 if (use_icount && all_cpu_threads_idle()) {
1559 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1560 * in the main_loop, wake it up in order to start the warp timer.
1562 qemu_notify_event();
1565 qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
1566 deal_with_unplugged_cpus();
1569 rcu_unregister_thread();
1570 return NULL;
1573 static void *qemu_hax_cpu_thread_fn(void *arg)
1575 CPUState *cpu = arg;
1576 int r;
1578 rcu_register_thread();
1579 qemu_mutex_lock_iothread();
1580 qemu_thread_get_self(cpu->thread);
1582 cpu->thread_id = qemu_get_thread_id();
1583 cpu->created = true;
1584 cpu->halted = 0;
1585 current_cpu = cpu;
1587 hax_init_vcpu(cpu);
1588 qemu_cond_signal(&qemu_cpu_cond);
1590 do {
1591 if (cpu_can_run(cpu)) {
1592 r = hax_smp_cpu_exec(cpu);
1593 if (r == EXCP_DEBUG) {
1594 cpu_handle_guest_debug(cpu);
1598 qemu_wait_io_event(cpu);
1599 } while (!cpu->unplug || cpu_can_run(cpu));
1600 rcu_unregister_thread();
1601 return NULL;
1604 /* The HVF-specific vCPU thread function. This one should only run when the host
1605 * CPU supports the VMX "unrestricted guest" feature. */
1606 static void *qemu_hvf_cpu_thread_fn(void *arg)
1608 CPUState *cpu = arg;
1610 int r;
1612 assert(hvf_enabled());
1614 rcu_register_thread();
1616 qemu_mutex_lock_iothread();
1617 qemu_thread_get_self(cpu->thread);
1619 cpu->thread_id = qemu_get_thread_id();
1620 cpu->can_do_io = 1;
1621 current_cpu = cpu;
1623 hvf_init_vcpu(cpu);
1625 /* signal CPU creation */
1626 cpu->created = true;
1627 qemu_cond_signal(&qemu_cpu_cond);
1629 do {
1630 if (cpu_can_run(cpu)) {
1631 r = hvf_vcpu_exec(cpu);
1632 if (r == EXCP_DEBUG) {
1633 cpu_handle_guest_debug(cpu);
1636 qemu_wait_io_event(cpu);
1637 } while (!cpu->unplug || cpu_can_run(cpu));
1639 hvf_vcpu_destroy(cpu);
1640 cpu->created = false;
1641 qemu_cond_signal(&qemu_cpu_cond);
1642 qemu_mutex_unlock_iothread();
1643 rcu_unregister_thread();
1644 return NULL;
1647 static void *qemu_whpx_cpu_thread_fn(void *arg)
1649 CPUState *cpu = arg;
1650 int r;
1652 rcu_register_thread();
1654 qemu_mutex_lock_iothread();
1655 qemu_thread_get_self(cpu->thread);
1656 cpu->thread_id = qemu_get_thread_id();
1657 current_cpu = cpu;
1659 r = whpx_init_vcpu(cpu);
1660 if (r < 0) {
1661 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1662 exit(1);
1665 /* signal CPU creation */
1666 cpu->created = true;
1667 qemu_cond_signal(&qemu_cpu_cond);
1669 do {
1670 if (cpu_can_run(cpu)) {
1671 r = whpx_vcpu_exec(cpu);
1672 if (r == EXCP_DEBUG) {
1673 cpu_handle_guest_debug(cpu);
1676 while (cpu_thread_is_idle(cpu)) {
1677 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1679 qemu_wait_io_event_common(cpu);
1680 } while (!cpu->unplug || cpu_can_run(cpu));
1682 whpx_destroy_vcpu(cpu);
1683 cpu->created = false;
1684 qemu_cond_signal(&qemu_cpu_cond);
1685 qemu_mutex_unlock_iothread();
1686 rcu_unregister_thread();
1687 return NULL;
1690 #ifdef _WIN32
1691 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1694 #endif
1696 /* Multi-threaded TCG
1698 * In the multi-threaded case each vCPU has its own thread. The TLS
1699 * variable current_cpu can be used deep in the code to find the
1700 * current CPUState for a given thread.
1703 static void *qemu_tcg_cpu_thread_fn(void *arg)
1705 CPUState *cpu = arg;
1707 assert(tcg_enabled());
1708 g_assert(!use_icount);
1710 rcu_register_thread();
1711 tcg_register_thread();
1713 qemu_mutex_lock_iothread();
1714 qemu_thread_get_self(cpu->thread);
1716 cpu->thread_id = qemu_get_thread_id();
1717 cpu->created = true;
1718 cpu->can_do_io = 1;
1719 current_cpu = cpu;
1720 qemu_cond_signal(&qemu_cpu_cond);
1722 /* process any pending work */
1723 cpu->exit_request = 1;
1725 do {
1726 if (cpu_can_run(cpu)) {
1727 int r;
1728 qemu_mutex_unlock_iothread();
1729 r = tcg_cpu_exec(cpu);
1730 qemu_mutex_lock_iothread();
1731 switch (r) {
1732 case EXCP_DEBUG:
1733 cpu_handle_guest_debug(cpu);
1734 break;
1735 case EXCP_HALTED:
1736 /* during start-up the vCPU is reset and the thread is
1737 * kicked several times. If we don't ensure we go back
1738 * to sleep in the halted state we won't cleanly
1739 * start-up when the vCPU is enabled.
1741 * cpu->halted should ensure we sleep in wait_io_event
1743 g_assert(cpu->halted);
1744 break;
1745 case EXCP_ATOMIC:
1746 qemu_mutex_unlock_iothread();
1747 cpu_exec_step_atomic(cpu);
1748 qemu_mutex_lock_iothread();
1749 default:
1750 /* Ignore everything else? */
1751 break;
1755 atomic_mb_set(&cpu->exit_request, 0);
1756 qemu_wait_io_event(cpu);
1757 } while (!cpu->unplug || cpu_can_run(cpu));
1759 qemu_tcg_destroy_vcpu(cpu);
1760 cpu->created = false;
1761 qemu_cond_signal(&qemu_cpu_cond);
1762 qemu_mutex_unlock_iothread();
1763 rcu_unregister_thread();
1764 return NULL;
1767 static void qemu_cpu_kick_thread(CPUState *cpu)
1769 #ifndef _WIN32
1770 int err;
1772 if (cpu->thread_kicked) {
1773 return;
1775 cpu->thread_kicked = true;
1776 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1777 if (err) {
1778 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1779 exit(1);
1781 #else /* _WIN32 */
1782 if (!qemu_cpu_is_self(cpu)) {
1783 if (whpx_enabled()) {
1784 whpx_vcpu_kick(cpu);
1785 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1786 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1787 __func__, GetLastError());
1788 exit(1);
1791 #endif
1794 void qemu_cpu_kick(CPUState *cpu)
1796 qemu_cond_broadcast(cpu->halt_cond);
1797 if (tcg_enabled()) {
1798 cpu_exit(cpu);
1799 /* NOP unless doing single-thread RR */
1800 qemu_cpu_kick_rr_cpu();
1801 } else {
1802 if (hax_enabled()) {
1804 * FIXME: race condition with the exit_request check in
1805 * hax_vcpu_hax_exec
1807 cpu->exit_request = 1;
1809 qemu_cpu_kick_thread(cpu);
1813 void qemu_cpu_kick_self(void)
1815 assert(current_cpu);
1816 qemu_cpu_kick_thread(current_cpu);
1819 bool qemu_cpu_is_self(CPUState *cpu)
1821 return qemu_thread_is_self(cpu->thread);
1824 bool qemu_in_vcpu_thread(void)
1826 return current_cpu && qemu_cpu_is_self(current_cpu);
1829 static __thread bool iothread_locked = false;
1831 bool qemu_mutex_iothread_locked(void)
1833 return iothread_locked;
1837 * The BQL is taken from so many places that it is worth profiling the
1838 * callers directly, instead of funneling them all through a single function.
1840 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1842 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1844 g_assert(!qemu_mutex_iothread_locked());
1845 bql_lock(&qemu_global_mutex, file, line);
1846 iothread_locked = true;
1849 void qemu_mutex_unlock_iothread(void)
1851 g_assert(qemu_mutex_iothread_locked());
1852 iothread_locked = false;
1853 qemu_mutex_unlock(&qemu_global_mutex);
1856 static bool all_vcpus_paused(void)
1858 CPUState *cpu;
1860 CPU_FOREACH(cpu) {
1861 if (!cpu->stopped) {
1862 return false;
1866 return true;
1869 void pause_all_vcpus(void)
1871 CPUState *cpu;
1873 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1874 CPU_FOREACH(cpu) {
1875 if (qemu_cpu_is_self(cpu)) {
1876 qemu_cpu_stop(cpu, true);
1877 } else {
1878 cpu->stop = true;
1879 qemu_cpu_kick(cpu);
1883 /* We need to drop the replay_lock so any vCPU threads woken up
1884 * can finish their replay tasks
1886 replay_mutex_unlock();
1888 while (!all_vcpus_paused()) {
1889 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1890 CPU_FOREACH(cpu) {
1891 qemu_cpu_kick(cpu);
1895 qemu_mutex_unlock_iothread();
1896 replay_mutex_lock();
1897 qemu_mutex_lock_iothread();
1900 void cpu_resume(CPUState *cpu)
1902 cpu->stop = false;
1903 cpu->stopped = false;
1904 qemu_cpu_kick(cpu);
1907 void resume_all_vcpus(void)
1909 CPUState *cpu;
1911 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1912 CPU_FOREACH(cpu) {
1913 cpu_resume(cpu);
1917 void cpu_remove_sync(CPUState *cpu)
1919 cpu->stop = true;
1920 cpu->unplug = true;
1921 qemu_cpu_kick(cpu);
1922 qemu_mutex_unlock_iothread();
1923 qemu_thread_join(cpu->thread);
1924 qemu_mutex_lock_iothread();
1927 /* For temporary buffers for forming a name */
1928 #define VCPU_THREAD_NAME_SIZE 16
1930 static void qemu_tcg_init_vcpu(CPUState *cpu)
1932 char thread_name[VCPU_THREAD_NAME_SIZE];
1933 static QemuCond *single_tcg_halt_cond;
1934 static QemuThread *single_tcg_cpu_thread;
1935 static int tcg_region_inited;
1937 assert(tcg_enabled());
1939 * Initialize TCG regions--once. Now is a good time, because:
1940 * (1) TCG's init context, prologue and target globals have been set up.
1941 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1942 * -accel flag is processed, so the check doesn't work then).
1944 if (!tcg_region_inited) {
1945 tcg_region_inited = 1;
1946 tcg_region_init();
1949 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1950 cpu->thread = g_malloc0(sizeof(QemuThread));
1951 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1952 qemu_cond_init(cpu->halt_cond);
1954 if (qemu_tcg_mttcg_enabled()) {
1955 /* create a thread per vCPU with TCG (MTTCG) */
1956 parallel_cpus = true;
1957 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1958 cpu->cpu_index);
1960 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1961 cpu, QEMU_THREAD_JOINABLE);
1963 } else {
1964 /* share a single thread for all cpus with TCG */
1965 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1966 qemu_thread_create(cpu->thread, thread_name,
1967 qemu_tcg_rr_cpu_thread_fn,
1968 cpu, QEMU_THREAD_JOINABLE);
1970 single_tcg_halt_cond = cpu->halt_cond;
1971 single_tcg_cpu_thread = cpu->thread;
1973 #ifdef _WIN32
1974 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1975 #endif
1976 } else {
1977 /* For non-MTTCG cases we share the thread */
1978 cpu->thread = single_tcg_cpu_thread;
1979 cpu->halt_cond = single_tcg_halt_cond;
1980 cpu->thread_id = first_cpu->thread_id;
1981 cpu->can_do_io = 1;
1982 cpu->created = true;
1986 static void qemu_hax_start_vcpu(CPUState *cpu)
1988 char thread_name[VCPU_THREAD_NAME_SIZE];
1990 cpu->thread = g_malloc0(sizeof(QemuThread));
1991 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1992 qemu_cond_init(cpu->halt_cond);
1994 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1995 cpu->cpu_index);
1996 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1997 cpu, QEMU_THREAD_JOINABLE);
1998 #ifdef _WIN32
1999 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2000 #endif
2003 static void qemu_kvm_start_vcpu(CPUState *cpu)
2005 char thread_name[VCPU_THREAD_NAME_SIZE];
2007 cpu->thread = g_malloc0(sizeof(QemuThread));
2008 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2009 qemu_cond_init(cpu->halt_cond);
2010 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2011 cpu->cpu_index);
2012 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2013 cpu, QEMU_THREAD_JOINABLE);
2016 static void qemu_hvf_start_vcpu(CPUState *cpu)
2018 char thread_name[VCPU_THREAD_NAME_SIZE];
2020 /* HVF currently does not support TCG, and only runs in
2021 * unrestricted-guest mode. */
2022 assert(hvf_enabled());
2024 cpu->thread = g_malloc0(sizeof(QemuThread));
2025 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2026 qemu_cond_init(cpu->halt_cond);
2028 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2029 cpu->cpu_index);
2030 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2031 cpu, QEMU_THREAD_JOINABLE);
2034 static void qemu_whpx_start_vcpu(CPUState *cpu)
2036 char thread_name[VCPU_THREAD_NAME_SIZE];
2038 cpu->thread = g_malloc0(sizeof(QemuThread));
2039 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2040 qemu_cond_init(cpu->halt_cond);
2041 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2042 cpu->cpu_index);
2043 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2044 cpu, QEMU_THREAD_JOINABLE);
2045 #ifdef _WIN32
2046 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2047 #endif
2050 static void qemu_dummy_start_vcpu(CPUState *cpu)
2052 char thread_name[VCPU_THREAD_NAME_SIZE];
2054 cpu->thread = g_malloc0(sizeof(QemuThread));
2055 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2056 qemu_cond_init(cpu->halt_cond);
2057 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2058 cpu->cpu_index);
2059 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2060 QEMU_THREAD_JOINABLE);
2063 void qemu_init_vcpu(CPUState *cpu)
2065 cpu->nr_cores = smp_cores;
2066 cpu->nr_threads = smp_threads;
2067 cpu->stopped = true;
2069 if (!cpu->as) {
2070 /* If the target cpu hasn't set up any address spaces itself,
2071 * give it the default one.
2073 cpu->num_ases = 1;
2074 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2077 if (kvm_enabled()) {
2078 qemu_kvm_start_vcpu(cpu);
2079 } else if (hax_enabled()) {
2080 qemu_hax_start_vcpu(cpu);
2081 } else if (hvf_enabled()) {
2082 qemu_hvf_start_vcpu(cpu);
2083 } else if (tcg_enabled()) {
2084 qemu_tcg_init_vcpu(cpu);
2085 } else if (whpx_enabled()) {
2086 qemu_whpx_start_vcpu(cpu);
2087 } else {
2088 qemu_dummy_start_vcpu(cpu);
2091 while (!cpu->created) {
2092 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2096 void cpu_stop_current(void)
2098 if (current_cpu) {
2099 qemu_cpu_stop(current_cpu, true);
2103 int vm_stop(RunState state)
2105 if (qemu_in_vcpu_thread()) {
2106 qemu_system_vmstop_request_prepare();
2107 qemu_system_vmstop_request(state);
2109 * FIXME: should not return to device code in case
2110 * vm_stop() has been requested.
2112 cpu_stop_current();
2113 return 0;
2116 return do_vm_stop(state, true);
2120 * Prepare for (re)starting the VM.
2121 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2122 * running or in case of an error condition), 0 otherwise.
2124 int vm_prepare_start(void)
2126 RunState requested;
2128 qemu_vmstop_requested(&requested);
2129 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2130 return -1;
2133 /* Ensure that a STOP/RESUME pair of events is emitted if a
2134 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2135 * example, according to documentation is always followed by
2136 * the STOP event.
2138 if (runstate_is_running()) {
2139 qapi_event_send_stop();
2140 qapi_event_send_resume();
2141 return -1;
2144 /* We are sending this now, but the CPUs will be resumed shortly later */
2145 qapi_event_send_resume();
2147 replay_enable_events();
2148 cpu_enable_ticks();
2149 runstate_set(RUN_STATE_RUNNING);
2150 vm_state_notify(1, RUN_STATE_RUNNING);
2151 return 0;
2154 void vm_start(void)
2156 if (!vm_prepare_start()) {
2157 resume_all_vcpus();
2161 /* does a state transition even if the VM is already stopped,
2162 current state is forgotten forever */
2163 int vm_stop_force_state(RunState state)
2165 if (runstate_is_running()) {
2166 return vm_stop(state);
2167 } else {
2168 runstate_set(state);
2170 bdrv_drain_all();
2171 /* Make sure to return an error if the flush in a previous vm_stop()
2172 * failed. */
2173 return bdrv_flush_all();
2177 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2179 /* XXX: implement xxx_cpu_list for targets that still miss it */
2180 #if defined(cpu_list)
2181 cpu_list(f, cpu_fprintf);
2182 #endif
2185 CpuInfoList *qmp_query_cpus(Error **errp)
2187 MachineState *ms = MACHINE(qdev_get_machine());
2188 MachineClass *mc = MACHINE_GET_CLASS(ms);
2189 CpuInfoList *head = NULL, *cur_item = NULL;
2190 CPUState *cpu;
2192 CPU_FOREACH(cpu) {
2193 CpuInfoList *info;
2194 #if defined(TARGET_I386)
2195 X86CPU *x86_cpu = X86_CPU(cpu);
2196 CPUX86State *env = &x86_cpu->env;
2197 #elif defined(TARGET_PPC)
2198 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2199 CPUPPCState *env = &ppc_cpu->env;
2200 #elif defined(TARGET_SPARC)
2201 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2202 CPUSPARCState *env = &sparc_cpu->env;
2203 #elif defined(TARGET_RISCV)
2204 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2205 CPURISCVState *env = &riscv_cpu->env;
2206 #elif defined(TARGET_MIPS)
2207 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2208 CPUMIPSState *env = &mips_cpu->env;
2209 #elif defined(TARGET_TRICORE)
2210 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2211 CPUTriCoreState *env = &tricore_cpu->env;
2212 #elif defined(TARGET_S390X)
2213 S390CPU *s390_cpu = S390_CPU(cpu);
2214 CPUS390XState *env = &s390_cpu->env;
2215 #endif
2217 cpu_synchronize_state(cpu);
2219 info = g_malloc0(sizeof(*info));
2220 info->value = g_malloc0(sizeof(*info->value));
2221 info->value->CPU = cpu->cpu_index;
2222 info->value->current = (cpu == first_cpu);
2223 info->value->halted = cpu->halted;
2224 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2225 info->value->thread_id = cpu->thread_id;
2226 #if defined(TARGET_I386)
2227 info->value->arch = CPU_INFO_ARCH_X86;
2228 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2229 #elif defined(TARGET_PPC)
2230 info->value->arch = CPU_INFO_ARCH_PPC;
2231 info->value->u.ppc.nip = env->nip;
2232 #elif defined(TARGET_SPARC)
2233 info->value->arch = CPU_INFO_ARCH_SPARC;
2234 info->value->u.q_sparc.pc = env->pc;
2235 info->value->u.q_sparc.npc = env->npc;
2236 #elif defined(TARGET_MIPS)
2237 info->value->arch = CPU_INFO_ARCH_MIPS;
2238 info->value->u.q_mips.PC = env->active_tc.PC;
2239 #elif defined(TARGET_TRICORE)
2240 info->value->arch = CPU_INFO_ARCH_TRICORE;
2241 info->value->u.tricore.PC = env->PC;
2242 #elif defined(TARGET_S390X)
2243 info->value->arch = CPU_INFO_ARCH_S390;
2244 info->value->u.s390.cpu_state = env->cpu_state;
2245 #elif defined(TARGET_RISCV)
2246 info->value->arch = CPU_INFO_ARCH_RISCV;
2247 info->value->u.riscv.pc = env->pc;
2248 #else
2249 info->value->arch = CPU_INFO_ARCH_OTHER;
2250 #endif
2251 info->value->has_props = !!mc->cpu_index_to_instance_props;
2252 if (info->value->has_props) {
2253 CpuInstanceProperties *props;
2254 props = g_malloc0(sizeof(*props));
2255 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2256 info->value->props = props;
2259 /* XXX: waiting for the qapi to support GSList */
2260 if (!cur_item) {
2261 head = cur_item = info;
2262 } else {
2263 cur_item->next = info;
2264 cur_item = info;
2268 return head;
2271 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2274 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2275 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2277 switch (target) {
2278 case SYS_EMU_TARGET_I386:
2279 case SYS_EMU_TARGET_X86_64:
2280 return CPU_INFO_ARCH_X86;
2282 case SYS_EMU_TARGET_PPC:
2283 case SYS_EMU_TARGET_PPC64:
2284 return CPU_INFO_ARCH_PPC;
2286 case SYS_EMU_TARGET_SPARC:
2287 case SYS_EMU_TARGET_SPARC64:
2288 return CPU_INFO_ARCH_SPARC;
2290 case SYS_EMU_TARGET_MIPS:
2291 case SYS_EMU_TARGET_MIPSEL:
2292 case SYS_EMU_TARGET_MIPS64:
2293 case SYS_EMU_TARGET_MIPS64EL:
2294 return CPU_INFO_ARCH_MIPS;
2296 case SYS_EMU_TARGET_TRICORE:
2297 return CPU_INFO_ARCH_TRICORE;
2299 case SYS_EMU_TARGET_S390X:
2300 return CPU_INFO_ARCH_S390;
2302 case SYS_EMU_TARGET_RISCV32:
2303 case SYS_EMU_TARGET_RISCV64:
2304 return CPU_INFO_ARCH_RISCV;
2306 default:
2307 return CPU_INFO_ARCH_OTHER;
2311 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2313 #ifdef TARGET_S390X
2314 S390CPU *s390_cpu = S390_CPU(cpu);
2315 CPUS390XState *env = &s390_cpu->env;
2317 info->cpu_state = env->cpu_state;
2318 #else
2319 abort();
2320 #endif
2324 * fast means: we NEVER interrupt vCPU threads to retrieve
2325 * information from KVM.
2327 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2329 MachineState *ms = MACHINE(qdev_get_machine());
2330 MachineClass *mc = MACHINE_GET_CLASS(ms);
2331 CpuInfoFastList *head = NULL, *cur_item = NULL;
2332 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2333 -1, &error_abort);
2334 CPUState *cpu;
2336 CPU_FOREACH(cpu) {
2337 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2338 info->value = g_malloc0(sizeof(*info->value));
2340 info->value->cpu_index = cpu->cpu_index;
2341 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2342 info->value->thread_id = cpu->thread_id;
2344 info->value->has_props = !!mc->cpu_index_to_instance_props;
2345 if (info->value->has_props) {
2346 CpuInstanceProperties *props;
2347 props = g_malloc0(sizeof(*props));
2348 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2349 info->value->props = props;
2352 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2353 info->value->target = target;
2354 if (target == SYS_EMU_TARGET_S390X) {
2355 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2358 if (!cur_item) {
2359 head = cur_item = info;
2360 } else {
2361 cur_item->next = info;
2362 cur_item = info;
2366 return head;
2369 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2370 bool has_cpu, int64_t cpu_index, Error **errp)
2372 FILE *f;
2373 uint32_t l;
2374 CPUState *cpu;
2375 uint8_t buf[1024];
2376 int64_t orig_addr = addr, orig_size = size;
2378 if (!has_cpu) {
2379 cpu_index = 0;
2382 cpu = qemu_get_cpu(cpu_index);
2383 if (cpu == NULL) {
2384 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2385 "a CPU number");
2386 return;
2389 f = fopen(filename, "wb");
2390 if (!f) {
2391 error_setg_file_open(errp, errno, filename);
2392 return;
2395 while (size != 0) {
2396 l = sizeof(buf);
2397 if (l > size)
2398 l = size;
2399 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2400 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2401 " specified", orig_addr, orig_size);
2402 goto exit;
2404 if (fwrite(buf, 1, l, f) != l) {
2405 error_setg(errp, QERR_IO_ERROR);
2406 goto exit;
2408 addr += l;
2409 size -= l;
2412 exit:
2413 fclose(f);
2416 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2417 Error **errp)
2419 FILE *f;
2420 uint32_t l;
2421 uint8_t buf[1024];
2423 f = fopen(filename, "wb");
2424 if (!f) {
2425 error_setg_file_open(errp, errno, filename);
2426 return;
2429 while (size != 0) {
2430 l = sizeof(buf);
2431 if (l > size)
2432 l = size;
2433 cpu_physical_memory_read(addr, buf, l);
2434 if (fwrite(buf, 1, l, f) != l) {
2435 error_setg(errp, QERR_IO_ERROR);
2436 goto exit;
2438 addr += l;
2439 size -= l;
2442 exit:
2443 fclose(f);
2446 void qmp_inject_nmi(Error **errp)
2448 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2451 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2453 if (!use_icount) {
2454 return;
2457 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2458 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2459 if (icount_align_option) {
2460 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2461 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2462 } else {
2463 cpu_fprintf(f, "Max guest delay NA\n");
2464 cpu_fprintf(f, "Max guest advance NA\n");