target/i386: move cpu_T0 to DisasContext
[qemu/ar7.git] / cpus.c
blobd8b3b46cc80396bc9cf65fb97cb002d1725f985e
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
57 #ifdef CONFIG_LINUX
59 #include <sys/prctl.h>
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
73 #endif /* CONFIG_LINUX */
75 int64_t max_delay;
76 int64_t max_advance;
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
86 bool cpu_is_stopped(CPUState *cpu)
88 return cpu->stopped || !runstate_is_running();
91 static bool cpu_thread_is_idle(CPUState *cpu)
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
96 if (cpu_is_stopped(cpu)) {
97 return true;
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
103 return true;
106 static bool all_cpu_threads_idle(void)
108 CPUState *cpu;
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
115 return true;
118 /***********************************************************/
119 /* guest cycle counter */
121 /* Protected by TimersState seqlock */
123 static bool icount_sleep = true;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 typedef struct TimersState {
128 /* Protected by BQL. */
129 int64_t cpu_ticks_prev;
130 int64_t cpu_ticks_offset;
132 /* Protect fields that can be respectively read outside the
133 * BQL, and written from multiple threads.
135 QemuSeqLock vm_clock_seqlock;
136 QemuSpin vm_clock_lock;
138 int16_t cpu_ticks_enabled;
140 /* Conversion factor from emulated instructions to virtual clock ticks. */
141 int16_t icount_time_shift;
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
146 int64_t vm_clock_warp_start;
147 int64_t cpu_clock_offset;
149 /* Only written by TCG thread */
150 int64_t qemu_icount;
152 /* for adjusting icount */
153 QEMUTimer *icount_rt_timer;
154 QEMUTimer *icount_vm_timer;
155 QEMUTimer *icount_warp_timer;
156 } TimersState;
158 static TimersState timers_state;
159 bool mttcg_enabled;
162 * We default to false if we know other options have been enabled
163 * which are currently incompatible with MTTCG. Otherwise when each
164 * guest (target) has been updated to support:
165 * - atomic instructions
166 * - memory ordering primitives (barriers)
167 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
169 * Once a guest architecture has been converted to the new primitives
170 * there are two remaining limitations to check.
172 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
173 * - The host must have a stronger memory order than the guest
175 * It may be possible in future to support strong guests on weak hosts
176 * but that will require tagging all load/stores in a guest with their
177 * implicit memory order requirements which would likely slow things
178 * down a lot.
181 static bool check_tcg_memory_orders_compatible(void)
183 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
184 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
185 #else
186 return false;
187 #endif
190 static bool default_mttcg_enabled(void)
192 if (use_icount || TCG_OVERSIZED_GUEST) {
193 return false;
194 } else {
195 #ifdef TARGET_SUPPORTS_MTTCG
196 return check_tcg_memory_orders_compatible();
197 #else
198 return false;
199 #endif
203 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
205 const char *t = qemu_opt_get(opts, "thread");
206 if (t) {
207 if (strcmp(t, "multi") == 0) {
208 if (TCG_OVERSIZED_GUEST) {
209 error_setg(errp, "No MTTCG when guest word size > hosts");
210 } else if (use_icount) {
211 error_setg(errp, "No MTTCG when icount is enabled");
212 } else {
213 #ifndef TARGET_SUPPORTS_MTTCG
214 error_report("Guest not yet converted to MTTCG - "
215 "you may get unexpected results");
216 #endif
217 if (!check_tcg_memory_orders_compatible()) {
218 error_report("Guest expects a stronger memory ordering "
219 "than the host provides");
220 error_printf("This may cause strange/hard to debug errors\n");
222 mttcg_enabled = true;
224 } else if (strcmp(t, "single") == 0) {
225 mttcg_enabled = false;
226 } else {
227 error_setg(errp, "Invalid 'thread' setting %s", t);
229 } else {
230 mttcg_enabled = default_mttcg_enabled();
234 /* The current number of executed instructions is based on what we
235 * originally budgeted minus the current state of the decrementing
236 * icount counters in extra/u16.low.
238 static int64_t cpu_get_icount_executed(CPUState *cpu)
240 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
244 * Update the global shared timer_state.qemu_icount to take into
245 * account executed instructions. This is done by the TCG vCPU
246 * thread so the main-loop can see time has moved forward.
248 static void cpu_update_icount_locked(CPUState *cpu)
250 int64_t executed = cpu_get_icount_executed(cpu);
251 cpu->icount_budget -= executed;
253 atomic_set_i64(&timers_state.qemu_icount,
254 timers_state.qemu_icount + executed);
258 * Update the global shared timer_state.qemu_icount to take into
259 * account executed instructions. This is done by the TCG vCPU
260 * thread so the main-loop can see time has moved forward.
262 void cpu_update_icount(CPUState *cpu)
264 seqlock_write_lock(&timers_state.vm_clock_seqlock,
265 &timers_state.vm_clock_lock);
266 cpu_update_icount_locked(cpu);
267 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
271 static int64_t cpu_get_icount_raw_locked(void)
273 CPUState *cpu = current_cpu;
275 if (cpu && cpu->running) {
276 if (!cpu->can_do_io) {
277 error_report("Bad icount read");
278 exit(1);
280 /* Take into account what has run */
281 cpu_update_icount_locked(cpu);
283 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
284 return atomic_read_i64(&timers_state.qemu_icount);
287 static int64_t cpu_get_icount_locked(void)
289 int64_t icount = cpu_get_icount_raw_locked();
290 return atomic_read_i64(&timers_state.qemu_icount_bias) +
291 cpu_icount_to_ns(icount);
294 int64_t cpu_get_icount_raw(void)
296 int64_t icount;
297 unsigned start;
299 do {
300 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
301 icount = cpu_get_icount_raw_locked();
302 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
304 return icount;
307 /* Return the virtual CPU time, based on the instruction counter. */
308 int64_t cpu_get_icount(void)
310 int64_t icount;
311 unsigned start;
313 do {
314 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
315 icount = cpu_get_icount_locked();
316 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
318 return icount;
321 int64_t cpu_icount_to_ns(int64_t icount)
323 return icount << atomic_read(&timers_state.icount_time_shift);
326 static int64_t cpu_get_ticks_locked(void)
328 int64_t ticks = timers_state.cpu_ticks_offset;
329 if (timers_state.cpu_ticks_enabled) {
330 ticks += cpu_get_host_ticks();
333 if (timers_state.cpu_ticks_prev > ticks) {
334 /* Non increasing ticks may happen if the host uses software suspend. */
335 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
336 ticks = timers_state.cpu_ticks_prev;
339 timers_state.cpu_ticks_prev = ticks;
340 return ticks;
343 /* return the time elapsed in VM between vm_start and vm_stop. Unless
344 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
345 * counter.
347 int64_t cpu_get_ticks(void)
349 int64_t ticks;
351 if (use_icount) {
352 return cpu_get_icount();
355 qemu_spin_lock(&timers_state.vm_clock_lock);
356 ticks = cpu_get_ticks_locked();
357 qemu_spin_unlock(&timers_state.vm_clock_lock);
358 return ticks;
361 static int64_t cpu_get_clock_locked(void)
363 int64_t time;
365 time = timers_state.cpu_clock_offset;
366 if (timers_state.cpu_ticks_enabled) {
367 time += get_clock();
370 return time;
373 /* Return the monotonic time elapsed in VM, i.e.,
374 * the time between vm_start and vm_stop
376 int64_t cpu_get_clock(void)
378 int64_t ti;
379 unsigned start;
381 do {
382 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
383 ti = cpu_get_clock_locked();
384 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
386 return ti;
389 /* enable cpu_get_ticks()
390 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
392 void cpu_enable_ticks(void)
394 seqlock_write_lock(&timers_state.vm_clock_seqlock,
395 &timers_state.vm_clock_lock);
396 if (!timers_state.cpu_ticks_enabled) {
397 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
398 timers_state.cpu_clock_offset -= get_clock();
399 timers_state.cpu_ticks_enabled = 1;
401 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
402 &timers_state.vm_clock_lock);
405 /* disable cpu_get_ticks() : the clock is stopped. You must not call
406 * cpu_get_ticks() after that.
407 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
409 void cpu_disable_ticks(void)
411 seqlock_write_lock(&timers_state.vm_clock_seqlock,
412 &timers_state.vm_clock_lock);
413 if (timers_state.cpu_ticks_enabled) {
414 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
415 timers_state.cpu_clock_offset = cpu_get_clock_locked();
416 timers_state.cpu_ticks_enabled = 0;
418 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
419 &timers_state.vm_clock_lock);
422 /* Correlation between real and virtual time is always going to be
423 fairly approximate, so ignore small variation.
424 When the guest is idle real and virtual time will be aligned in
425 the IO wait loop. */
426 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
428 static void icount_adjust(void)
430 int64_t cur_time;
431 int64_t cur_icount;
432 int64_t delta;
434 /* Protected by TimersState mutex. */
435 static int64_t last_delta;
437 /* If the VM is not running, then do nothing. */
438 if (!runstate_is_running()) {
439 return;
442 seqlock_write_lock(&timers_state.vm_clock_seqlock,
443 &timers_state.vm_clock_lock);
444 cur_time = cpu_get_clock_locked();
445 cur_icount = cpu_get_icount_locked();
447 delta = cur_icount - cur_time;
448 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
449 if (delta > 0
450 && last_delta + ICOUNT_WOBBLE < delta * 2
451 && timers_state.icount_time_shift > 0) {
452 /* The guest is getting too far ahead. Slow time down. */
453 atomic_set(&timers_state.icount_time_shift,
454 timers_state.icount_time_shift - 1);
456 if (delta < 0
457 && last_delta - ICOUNT_WOBBLE > delta * 2
458 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
459 /* The guest is getting too far behind. Speed time up. */
460 atomic_set(&timers_state.icount_time_shift,
461 timers_state.icount_time_shift + 1);
463 last_delta = delta;
464 atomic_set_i64(&timers_state.qemu_icount_bias,
465 cur_icount - (timers_state.qemu_icount
466 << timers_state.icount_time_shift));
467 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468 &timers_state.vm_clock_lock);
471 static void icount_adjust_rt(void *opaque)
473 timer_mod(timers_state.icount_rt_timer,
474 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
475 icount_adjust();
478 static void icount_adjust_vm(void *opaque)
480 timer_mod(timers_state.icount_vm_timer,
481 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
482 NANOSECONDS_PER_SECOND / 10);
483 icount_adjust();
486 static int64_t qemu_icount_round(int64_t count)
488 int shift = atomic_read(&timers_state.icount_time_shift);
489 return (count + (1 << shift) - 1) >> shift;
492 static void icount_warp_rt(void)
494 unsigned seq;
495 int64_t warp_start;
497 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
498 * changes from -1 to another value, so the race here is okay.
500 do {
501 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
502 warp_start = timers_state.vm_clock_warp_start;
503 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
505 if (warp_start == -1) {
506 return;
509 seqlock_write_lock(&timers_state.vm_clock_seqlock,
510 &timers_state.vm_clock_lock);
511 if (runstate_is_running()) {
512 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
513 cpu_get_clock_locked());
514 int64_t warp_delta;
516 warp_delta = clock - timers_state.vm_clock_warp_start;
517 if (use_icount == 2) {
519 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
520 * far ahead of real time.
522 int64_t cur_icount = cpu_get_icount_locked();
523 int64_t delta = clock - cur_icount;
524 warp_delta = MIN(warp_delta, delta);
526 atomic_set_i64(&timers_state.qemu_icount_bias,
527 timers_state.qemu_icount_bias + warp_delta);
529 timers_state.vm_clock_warp_start = -1;
530 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
531 &timers_state.vm_clock_lock);
533 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
534 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
538 static void icount_timer_cb(void *opaque)
540 /* No need for a checkpoint because the timer already synchronizes
541 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
543 icount_warp_rt();
546 void qtest_clock_warp(int64_t dest)
548 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
549 AioContext *aio_context;
550 assert(qtest_enabled());
551 aio_context = qemu_get_aio_context();
552 while (clock < dest) {
553 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
554 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
556 seqlock_write_lock(&timers_state.vm_clock_seqlock,
557 &timers_state.vm_clock_lock);
558 atomic_set_i64(&timers_state.qemu_icount_bias,
559 timers_state.qemu_icount_bias + warp);
560 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
561 &timers_state.vm_clock_lock);
563 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
564 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
565 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
567 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
570 void qemu_start_warp_timer(void)
572 int64_t clock;
573 int64_t deadline;
575 if (!use_icount) {
576 return;
579 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
580 * do not fire, so computing the deadline does not make sense.
582 if (!runstate_is_running()) {
583 return;
586 /* warp clock deterministically in record/replay mode */
587 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
588 return;
591 if (!all_cpu_threads_idle()) {
592 return;
595 if (qtest_enabled()) {
596 /* When testing, qtest commands advance icount. */
597 return;
600 /* We want to use the earliest deadline from ALL vm_clocks */
601 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
602 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
603 if (deadline < 0) {
604 static bool notified;
605 if (!icount_sleep && !notified) {
606 warn_report("icount sleep disabled and no active timers");
607 notified = true;
609 return;
612 if (deadline > 0) {
614 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
615 * sleep. Otherwise, the CPU might be waiting for a future timer
616 * interrupt to wake it up, but the interrupt never comes because
617 * the vCPU isn't running any insns and thus doesn't advance the
618 * QEMU_CLOCK_VIRTUAL.
620 if (!icount_sleep) {
622 * We never let VCPUs sleep in no sleep icount mode.
623 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
624 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
625 * It is useful when we want a deterministic execution time,
626 * isolated from host latencies.
628 seqlock_write_lock(&timers_state.vm_clock_seqlock,
629 &timers_state.vm_clock_lock);
630 atomic_set_i64(&timers_state.qemu_icount_bias,
631 timers_state.qemu_icount_bias + deadline);
632 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
633 &timers_state.vm_clock_lock);
634 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
635 } else {
637 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
638 * "real" time, (related to the time left until the next event) has
639 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
640 * This avoids that the warps are visible externally; for example,
641 * you will not be sending network packets continuously instead of
642 * every 100ms.
644 seqlock_write_lock(&timers_state.vm_clock_seqlock,
645 &timers_state.vm_clock_lock);
646 if (timers_state.vm_clock_warp_start == -1
647 || timers_state.vm_clock_warp_start > clock) {
648 timers_state.vm_clock_warp_start = clock;
650 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
651 &timers_state.vm_clock_lock);
652 timer_mod_anticipate(timers_state.icount_warp_timer,
653 clock + deadline);
655 } else if (deadline == 0) {
656 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
660 static void qemu_account_warp_timer(void)
662 if (!use_icount || !icount_sleep) {
663 return;
666 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
667 * do not fire, so computing the deadline does not make sense.
669 if (!runstate_is_running()) {
670 return;
673 /* warp clock deterministically in record/replay mode */
674 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
675 return;
678 timer_del(timers_state.icount_warp_timer);
679 icount_warp_rt();
682 static bool icount_state_needed(void *opaque)
684 return use_icount;
687 static bool warp_timer_state_needed(void *opaque)
689 TimersState *s = opaque;
690 return s->icount_warp_timer != NULL;
693 static bool adjust_timers_state_needed(void *opaque)
695 TimersState *s = opaque;
696 return s->icount_rt_timer != NULL;
700 * Subsection for warp timer migration is optional, because may not be created
702 static const VMStateDescription icount_vmstate_warp_timer = {
703 .name = "timer/icount/warp_timer",
704 .version_id = 1,
705 .minimum_version_id = 1,
706 .needed = warp_timer_state_needed,
707 .fields = (VMStateField[]) {
708 VMSTATE_INT64(vm_clock_warp_start, TimersState),
709 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
710 VMSTATE_END_OF_LIST()
714 static const VMStateDescription icount_vmstate_adjust_timers = {
715 .name = "timer/icount/timers",
716 .version_id = 1,
717 .minimum_version_id = 1,
718 .needed = adjust_timers_state_needed,
719 .fields = (VMStateField[]) {
720 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
721 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
722 VMSTATE_END_OF_LIST()
727 * This is a subsection for icount migration.
729 static const VMStateDescription icount_vmstate_timers = {
730 .name = "timer/icount",
731 .version_id = 1,
732 .minimum_version_id = 1,
733 .needed = icount_state_needed,
734 .fields = (VMStateField[]) {
735 VMSTATE_INT64(qemu_icount_bias, TimersState),
736 VMSTATE_INT64(qemu_icount, TimersState),
737 VMSTATE_END_OF_LIST()
739 .subsections = (const VMStateDescription*[]) {
740 &icount_vmstate_warp_timer,
741 &icount_vmstate_adjust_timers,
742 NULL
746 static const VMStateDescription vmstate_timers = {
747 .name = "timer",
748 .version_id = 2,
749 .minimum_version_id = 1,
750 .fields = (VMStateField[]) {
751 VMSTATE_INT64(cpu_ticks_offset, TimersState),
752 VMSTATE_UNUSED(8),
753 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
754 VMSTATE_END_OF_LIST()
756 .subsections = (const VMStateDescription*[]) {
757 &icount_vmstate_timers,
758 NULL
762 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
764 double pct;
765 double throttle_ratio;
766 long sleeptime_ns;
768 if (!cpu_throttle_get_percentage()) {
769 return;
772 pct = (double)cpu_throttle_get_percentage()/100;
773 throttle_ratio = pct / (1 - pct);
774 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
776 qemu_mutex_unlock_iothread();
777 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
778 qemu_mutex_lock_iothread();
779 atomic_set(&cpu->throttle_thread_scheduled, 0);
782 static void cpu_throttle_timer_tick(void *opaque)
784 CPUState *cpu;
785 double pct;
787 /* Stop the timer if needed */
788 if (!cpu_throttle_get_percentage()) {
789 return;
791 CPU_FOREACH(cpu) {
792 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
793 async_run_on_cpu(cpu, cpu_throttle_thread,
794 RUN_ON_CPU_NULL);
798 pct = (double)cpu_throttle_get_percentage()/100;
799 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
800 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
803 void cpu_throttle_set(int new_throttle_pct)
805 /* Ensure throttle percentage is within valid range */
806 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
807 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
809 atomic_set(&throttle_percentage, new_throttle_pct);
811 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
812 CPU_THROTTLE_TIMESLICE_NS);
815 void cpu_throttle_stop(void)
817 atomic_set(&throttle_percentage, 0);
820 bool cpu_throttle_active(void)
822 return (cpu_throttle_get_percentage() != 0);
825 int cpu_throttle_get_percentage(void)
827 return atomic_read(&throttle_percentage);
830 void cpu_ticks_init(void)
832 seqlock_init(&timers_state.vm_clock_seqlock);
833 qemu_spin_init(&timers_state.vm_clock_lock);
834 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
835 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
836 cpu_throttle_timer_tick, NULL);
839 void configure_icount(QemuOpts *opts, Error **errp)
841 const char *option;
842 char *rem_str = NULL;
844 option = qemu_opt_get(opts, "shift");
845 if (!option) {
846 if (qemu_opt_get(opts, "align") != NULL) {
847 error_setg(errp, "Please specify shift option when using align");
849 return;
852 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
853 if (icount_sleep) {
854 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
855 icount_timer_cb, NULL);
858 icount_align_option = qemu_opt_get_bool(opts, "align", false);
860 if (icount_align_option && !icount_sleep) {
861 error_setg(errp, "align=on and sleep=off are incompatible");
863 if (strcmp(option, "auto") != 0) {
864 errno = 0;
865 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
866 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
867 error_setg(errp, "icount: Invalid shift value");
869 use_icount = 1;
870 return;
871 } else if (icount_align_option) {
872 error_setg(errp, "shift=auto and align=on are incompatible");
873 } else if (!icount_sleep) {
874 error_setg(errp, "shift=auto and sleep=off are incompatible");
877 use_icount = 2;
879 /* 125MIPS seems a reasonable initial guess at the guest speed.
880 It will be corrected fairly quickly anyway. */
881 timers_state.icount_time_shift = 3;
883 /* Have both realtime and virtual time triggers for speed adjustment.
884 The realtime trigger catches emulated time passing too slowly,
885 the virtual time trigger catches emulated time passing too fast.
886 Realtime triggers occur even when idle, so use them less frequently
887 than VM triggers. */
888 timers_state.vm_clock_warp_start = -1;
889 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
890 icount_adjust_rt, NULL);
891 timer_mod(timers_state.icount_rt_timer,
892 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
893 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
894 icount_adjust_vm, NULL);
895 timer_mod(timers_state.icount_vm_timer,
896 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
897 NANOSECONDS_PER_SECOND / 10);
900 /***********************************************************/
901 /* TCG vCPU kick timer
903 * The kick timer is responsible for moving single threaded vCPU
904 * emulation on to the next vCPU. If more than one vCPU is running a
905 * timer event with force a cpu->exit so the next vCPU can get
906 * scheduled.
908 * The timer is removed if all vCPUs are idle and restarted again once
909 * idleness is complete.
912 static QEMUTimer *tcg_kick_vcpu_timer;
913 static CPUState *tcg_current_rr_cpu;
915 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
917 static inline int64_t qemu_tcg_next_kick(void)
919 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
922 /* Kick the currently round-robin scheduled vCPU */
923 static void qemu_cpu_kick_rr_cpu(void)
925 CPUState *cpu;
926 do {
927 cpu = atomic_mb_read(&tcg_current_rr_cpu);
928 if (cpu) {
929 cpu_exit(cpu);
931 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
934 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
938 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
940 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
941 qemu_notify_event();
942 return;
945 if (qemu_in_vcpu_thread()) {
946 /* A CPU is currently running; kick it back out to the
947 * tcg_cpu_exec() loop so it will recalculate its
948 * icount deadline immediately.
950 qemu_cpu_kick(current_cpu);
951 } else if (first_cpu) {
952 /* qemu_cpu_kick is not enough to kick a halted CPU out of
953 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
954 * causes cpu_thread_is_idle to return false. This way,
955 * handle_icount_deadline can run.
956 * If we have no CPUs at all for some reason, we don't
957 * need to do anything.
959 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
963 static void kick_tcg_thread(void *opaque)
965 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
966 qemu_cpu_kick_rr_cpu();
969 static void start_tcg_kick_timer(void)
971 assert(!mttcg_enabled);
972 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
973 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
974 kick_tcg_thread, NULL);
975 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
979 static void stop_tcg_kick_timer(void)
981 assert(!mttcg_enabled);
982 if (tcg_kick_vcpu_timer) {
983 timer_del(tcg_kick_vcpu_timer);
984 tcg_kick_vcpu_timer = NULL;
988 /***********************************************************/
989 void hw_error(const char *fmt, ...)
991 va_list ap;
992 CPUState *cpu;
994 va_start(ap, fmt);
995 fprintf(stderr, "qemu: hardware error: ");
996 vfprintf(stderr, fmt, ap);
997 fprintf(stderr, "\n");
998 CPU_FOREACH(cpu) {
999 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1000 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
1002 va_end(ap);
1003 abort();
1006 void cpu_synchronize_all_states(void)
1008 CPUState *cpu;
1010 CPU_FOREACH(cpu) {
1011 cpu_synchronize_state(cpu);
1012 /* TODO: move to cpu_synchronize_state() */
1013 if (hvf_enabled()) {
1014 hvf_cpu_synchronize_state(cpu);
1019 void cpu_synchronize_all_post_reset(void)
1021 CPUState *cpu;
1023 CPU_FOREACH(cpu) {
1024 cpu_synchronize_post_reset(cpu);
1025 /* TODO: move to cpu_synchronize_post_reset() */
1026 if (hvf_enabled()) {
1027 hvf_cpu_synchronize_post_reset(cpu);
1032 void cpu_synchronize_all_post_init(void)
1034 CPUState *cpu;
1036 CPU_FOREACH(cpu) {
1037 cpu_synchronize_post_init(cpu);
1038 /* TODO: move to cpu_synchronize_post_init() */
1039 if (hvf_enabled()) {
1040 hvf_cpu_synchronize_post_init(cpu);
1045 void cpu_synchronize_all_pre_loadvm(void)
1047 CPUState *cpu;
1049 CPU_FOREACH(cpu) {
1050 cpu_synchronize_pre_loadvm(cpu);
1054 static int do_vm_stop(RunState state, bool send_stop)
1056 int ret = 0;
1058 if (runstate_is_running()) {
1059 cpu_disable_ticks();
1060 pause_all_vcpus();
1061 runstate_set(state);
1062 vm_state_notify(0, state);
1063 if (send_stop) {
1064 qapi_event_send_stop();
1068 bdrv_drain_all();
1069 replay_disable_events();
1070 ret = bdrv_flush_all();
1072 return ret;
1075 /* Special vm_stop() variant for terminating the process. Historically clients
1076 * did not expect a QMP STOP event and so we need to retain compatibility.
1078 int vm_shutdown(void)
1080 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1083 static bool cpu_can_run(CPUState *cpu)
1085 if (cpu->stop) {
1086 return false;
1088 if (cpu_is_stopped(cpu)) {
1089 return false;
1091 return true;
1094 static void cpu_handle_guest_debug(CPUState *cpu)
1096 gdb_set_stop_cpu(cpu);
1097 qemu_system_debug_request();
1098 cpu->stopped = true;
1101 #ifdef CONFIG_LINUX
1102 static void sigbus_reraise(void)
1104 sigset_t set;
1105 struct sigaction action;
1107 memset(&action, 0, sizeof(action));
1108 action.sa_handler = SIG_DFL;
1109 if (!sigaction(SIGBUS, &action, NULL)) {
1110 raise(SIGBUS);
1111 sigemptyset(&set);
1112 sigaddset(&set, SIGBUS);
1113 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1115 perror("Failed to re-raise SIGBUS!\n");
1116 abort();
1119 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1121 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1122 sigbus_reraise();
1125 if (current_cpu) {
1126 /* Called asynchronously in VCPU thread. */
1127 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1128 sigbus_reraise();
1130 } else {
1131 /* Called synchronously (via signalfd) in main thread. */
1132 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1133 sigbus_reraise();
1138 static void qemu_init_sigbus(void)
1140 struct sigaction action;
1142 memset(&action, 0, sizeof(action));
1143 action.sa_flags = SA_SIGINFO;
1144 action.sa_sigaction = sigbus_handler;
1145 sigaction(SIGBUS, &action, NULL);
1147 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1149 #else /* !CONFIG_LINUX */
1150 static void qemu_init_sigbus(void)
1153 #endif /* !CONFIG_LINUX */
1155 static QemuMutex qemu_global_mutex;
1157 static QemuThread io_thread;
1159 /* cpu creation */
1160 static QemuCond qemu_cpu_cond;
1161 /* system init */
1162 static QemuCond qemu_pause_cond;
1164 void qemu_init_cpu_loop(void)
1166 qemu_init_sigbus();
1167 qemu_cond_init(&qemu_cpu_cond);
1168 qemu_cond_init(&qemu_pause_cond);
1169 qemu_mutex_init(&qemu_global_mutex);
1171 qemu_thread_get_self(&io_thread);
1174 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1176 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1179 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1181 if (kvm_destroy_vcpu(cpu) < 0) {
1182 error_report("kvm_destroy_vcpu failed");
1183 exit(EXIT_FAILURE);
1187 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1191 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1193 g_assert(qemu_cpu_is_self(cpu));
1194 cpu->stop = false;
1195 cpu->stopped = true;
1196 if (exit) {
1197 cpu_exit(cpu);
1199 qemu_cond_broadcast(&qemu_pause_cond);
1202 static void qemu_wait_io_event_common(CPUState *cpu)
1204 atomic_mb_set(&cpu->thread_kicked, false);
1205 if (cpu->stop) {
1206 qemu_cpu_stop(cpu, false);
1208 process_queued_cpu_work(cpu);
1211 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1213 while (all_cpu_threads_idle()) {
1214 stop_tcg_kick_timer();
1215 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1218 start_tcg_kick_timer();
1220 qemu_wait_io_event_common(cpu);
1223 static void qemu_wait_io_event(CPUState *cpu)
1225 while (cpu_thread_is_idle(cpu)) {
1226 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1229 #ifdef _WIN32
1230 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1231 if (!tcg_enabled()) {
1232 SleepEx(0, TRUE);
1234 #endif
1235 qemu_wait_io_event_common(cpu);
1238 static void *qemu_kvm_cpu_thread_fn(void *arg)
1240 CPUState *cpu = arg;
1241 int r;
1243 rcu_register_thread();
1245 qemu_mutex_lock_iothread();
1246 qemu_thread_get_self(cpu->thread);
1247 cpu->thread_id = qemu_get_thread_id();
1248 cpu->can_do_io = 1;
1249 current_cpu = cpu;
1251 r = kvm_init_vcpu(cpu);
1252 if (r < 0) {
1253 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1254 exit(1);
1257 kvm_init_cpu_signals(cpu);
1259 /* signal CPU creation */
1260 cpu->created = true;
1261 qemu_cond_signal(&qemu_cpu_cond);
1263 do {
1264 if (cpu_can_run(cpu)) {
1265 r = kvm_cpu_exec(cpu);
1266 if (r == EXCP_DEBUG) {
1267 cpu_handle_guest_debug(cpu);
1270 qemu_wait_io_event(cpu);
1271 } while (!cpu->unplug || cpu_can_run(cpu));
1273 qemu_kvm_destroy_vcpu(cpu);
1274 cpu->created = false;
1275 qemu_cond_signal(&qemu_cpu_cond);
1276 qemu_mutex_unlock_iothread();
1277 rcu_unregister_thread();
1278 return NULL;
1281 static void *qemu_dummy_cpu_thread_fn(void *arg)
1283 #ifdef _WIN32
1284 error_report("qtest is not supported under Windows");
1285 exit(1);
1286 #else
1287 CPUState *cpu = arg;
1288 sigset_t waitset;
1289 int r;
1291 rcu_register_thread();
1293 qemu_mutex_lock_iothread();
1294 qemu_thread_get_self(cpu->thread);
1295 cpu->thread_id = qemu_get_thread_id();
1296 cpu->can_do_io = 1;
1297 current_cpu = cpu;
1299 sigemptyset(&waitset);
1300 sigaddset(&waitset, SIG_IPI);
1302 /* signal CPU creation */
1303 cpu->created = true;
1304 qemu_cond_signal(&qemu_cpu_cond);
1306 do {
1307 qemu_mutex_unlock_iothread();
1308 do {
1309 int sig;
1310 r = sigwait(&waitset, &sig);
1311 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1312 if (r == -1) {
1313 perror("sigwait");
1314 exit(1);
1316 qemu_mutex_lock_iothread();
1317 qemu_wait_io_event(cpu);
1318 } while (!cpu->unplug);
1320 rcu_unregister_thread();
1321 return NULL;
1322 #endif
1325 static int64_t tcg_get_icount_limit(void)
1327 int64_t deadline;
1329 if (replay_mode != REPLAY_MODE_PLAY) {
1330 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1332 /* Maintain prior (possibly buggy) behaviour where if no deadline
1333 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1334 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1335 * nanoseconds.
1337 if ((deadline < 0) || (deadline > INT32_MAX)) {
1338 deadline = INT32_MAX;
1341 return qemu_icount_round(deadline);
1342 } else {
1343 return replay_get_instructions();
1347 static void handle_icount_deadline(void)
1349 assert(qemu_in_vcpu_thread());
1350 if (use_icount) {
1351 int64_t deadline =
1352 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1354 if (deadline == 0) {
1355 /* Wake up other AioContexts. */
1356 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1357 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1362 static void prepare_icount_for_run(CPUState *cpu)
1364 if (use_icount) {
1365 int insns_left;
1367 /* These should always be cleared by process_icount_data after
1368 * each vCPU execution. However u16.high can be raised
1369 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1371 g_assert(cpu->icount_decr.u16.low == 0);
1372 g_assert(cpu->icount_extra == 0);
1374 cpu->icount_budget = tcg_get_icount_limit();
1375 insns_left = MIN(0xffff, cpu->icount_budget);
1376 cpu->icount_decr.u16.low = insns_left;
1377 cpu->icount_extra = cpu->icount_budget - insns_left;
1379 replay_mutex_lock();
1383 static void process_icount_data(CPUState *cpu)
1385 if (use_icount) {
1386 /* Account for executed instructions */
1387 cpu_update_icount(cpu);
1389 /* Reset the counters */
1390 cpu->icount_decr.u16.low = 0;
1391 cpu->icount_extra = 0;
1392 cpu->icount_budget = 0;
1394 replay_account_executed_instructions();
1396 replay_mutex_unlock();
1401 static int tcg_cpu_exec(CPUState *cpu)
1403 int ret;
1404 #ifdef CONFIG_PROFILER
1405 int64_t ti;
1406 #endif
1408 assert(tcg_enabled());
1409 #ifdef CONFIG_PROFILER
1410 ti = profile_getclock();
1411 #endif
1412 cpu_exec_start(cpu);
1413 ret = cpu_exec(cpu);
1414 cpu_exec_end(cpu);
1415 #ifdef CONFIG_PROFILER
1416 tcg_time += profile_getclock() - ti;
1417 #endif
1418 return ret;
1421 /* Destroy any remaining vCPUs which have been unplugged and have
1422 * finished running
1424 static void deal_with_unplugged_cpus(void)
1426 CPUState *cpu;
1428 CPU_FOREACH(cpu) {
1429 if (cpu->unplug && !cpu_can_run(cpu)) {
1430 qemu_tcg_destroy_vcpu(cpu);
1431 cpu->created = false;
1432 qemu_cond_signal(&qemu_cpu_cond);
1433 break;
1438 /* Single-threaded TCG
1440 * In the single-threaded case each vCPU is simulated in turn. If
1441 * there is more than a single vCPU we create a simple timer to kick
1442 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1443 * This is done explicitly rather than relying on side-effects
1444 * elsewhere.
1447 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1449 CPUState *cpu = arg;
1451 assert(tcg_enabled());
1452 rcu_register_thread();
1453 tcg_register_thread();
1455 qemu_mutex_lock_iothread();
1456 qemu_thread_get_self(cpu->thread);
1458 cpu->thread_id = qemu_get_thread_id();
1459 cpu->created = true;
1460 cpu->can_do_io = 1;
1461 qemu_cond_signal(&qemu_cpu_cond);
1463 /* wait for initial kick-off after machine start */
1464 while (first_cpu->stopped) {
1465 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1467 /* process any pending work */
1468 CPU_FOREACH(cpu) {
1469 current_cpu = cpu;
1470 qemu_wait_io_event_common(cpu);
1474 start_tcg_kick_timer();
1476 cpu = first_cpu;
1478 /* process any pending work */
1479 cpu->exit_request = 1;
1481 while (1) {
1482 qemu_mutex_unlock_iothread();
1483 replay_mutex_lock();
1484 qemu_mutex_lock_iothread();
1485 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1486 qemu_account_warp_timer();
1488 /* Run the timers here. This is much more efficient than
1489 * waking up the I/O thread and waiting for completion.
1491 handle_icount_deadline();
1493 replay_mutex_unlock();
1495 if (!cpu) {
1496 cpu = first_cpu;
1499 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1501 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1502 current_cpu = cpu;
1504 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1505 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1507 if (cpu_can_run(cpu)) {
1508 int r;
1510 qemu_mutex_unlock_iothread();
1511 prepare_icount_for_run(cpu);
1513 r = tcg_cpu_exec(cpu);
1515 process_icount_data(cpu);
1516 qemu_mutex_lock_iothread();
1518 if (r == EXCP_DEBUG) {
1519 cpu_handle_guest_debug(cpu);
1520 break;
1521 } else if (r == EXCP_ATOMIC) {
1522 qemu_mutex_unlock_iothread();
1523 cpu_exec_step_atomic(cpu);
1524 qemu_mutex_lock_iothread();
1525 break;
1527 } else if (cpu->stop) {
1528 if (cpu->unplug) {
1529 cpu = CPU_NEXT(cpu);
1531 break;
1534 cpu = CPU_NEXT(cpu);
1535 } /* while (cpu && !cpu->exit_request).. */
1537 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1538 atomic_set(&tcg_current_rr_cpu, NULL);
1540 if (cpu && cpu->exit_request) {
1541 atomic_mb_set(&cpu->exit_request, 0);
1544 qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
1545 deal_with_unplugged_cpus();
1548 rcu_unregister_thread();
1549 return NULL;
1552 static void *qemu_hax_cpu_thread_fn(void *arg)
1554 CPUState *cpu = arg;
1555 int r;
1557 rcu_register_thread();
1558 qemu_mutex_lock_iothread();
1559 qemu_thread_get_self(cpu->thread);
1561 cpu->thread_id = qemu_get_thread_id();
1562 cpu->created = true;
1563 cpu->halted = 0;
1564 current_cpu = cpu;
1566 hax_init_vcpu(cpu);
1567 qemu_cond_signal(&qemu_cpu_cond);
1569 do {
1570 if (cpu_can_run(cpu)) {
1571 r = hax_smp_cpu_exec(cpu);
1572 if (r == EXCP_DEBUG) {
1573 cpu_handle_guest_debug(cpu);
1577 qemu_wait_io_event(cpu);
1578 } while (!cpu->unplug || cpu_can_run(cpu));
1579 rcu_unregister_thread();
1580 return NULL;
1583 /* The HVF-specific vCPU thread function. This one should only run when the host
1584 * CPU supports the VMX "unrestricted guest" feature. */
1585 static void *qemu_hvf_cpu_thread_fn(void *arg)
1587 CPUState *cpu = arg;
1589 int r;
1591 assert(hvf_enabled());
1593 rcu_register_thread();
1595 qemu_mutex_lock_iothread();
1596 qemu_thread_get_self(cpu->thread);
1598 cpu->thread_id = qemu_get_thread_id();
1599 cpu->can_do_io = 1;
1600 current_cpu = cpu;
1602 hvf_init_vcpu(cpu);
1604 /* signal CPU creation */
1605 cpu->created = true;
1606 qemu_cond_signal(&qemu_cpu_cond);
1608 do {
1609 if (cpu_can_run(cpu)) {
1610 r = hvf_vcpu_exec(cpu);
1611 if (r == EXCP_DEBUG) {
1612 cpu_handle_guest_debug(cpu);
1615 qemu_wait_io_event(cpu);
1616 } while (!cpu->unplug || cpu_can_run(cpu));
1618 hvf_vcpu_destroy(cpu);
1619 cpu->created = false;
1620 qemu_cond_signal(&qemu_cpu_cond);
1621 qemu_mutex_unlock_iothread();
1622 rcu_unregister_thread();
1623 return NULL;
1626 static void *qemu_whpx_cpu_thread_fn(void *arg)
1628 CPUState *cpu = arg;
1629 int r;
1631 rcu_register_thread();
1633 qemu_mutex_lock_iothread();
1634 qemu_thread_get_self(cpu->thread);
1635 cpu->thread_id = qemu_get_thread_id();
1636 current_cpu = cpu;
1638 r = whpx_init_vcpu(cpu);
1639 if (r < 0) {
1640 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1641 exit(1);
1644 /* signal CPU creation */
1645 cpu->created = true;
1646 qemu_cond_signal(&qemu_cpu_cond);
1648 do {
1649 if (cpu_can_run(cpu)) {
1650 r = whpx_vcpu_exec(cpu);
1651 if (r == EXCP_DEBUG) {
1652 cpu_handle_guest_debug(cpu);
1655 while (cpu_thread_is_idle(cpu)) {
1656 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1658 qemu_wait_io_event_common(cpu);
1659 } while (!cpu->unplug || cpu_can_run(cpu));
1661 whpx_destroy_vcpu(cpu);
1662 cpu->created = false;
1663 qemu_cond_signal(&qemu_cpu_cond);
1664 qemu_mutex_unlock_iothread();
1665 rcu_unregister_thread();
1666 return NULL;
1669 #ifdef _WIN32
1670 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1673 #endif
1675 /* Multi-threaded TCG
1677 * In the multi-threaded case each vCPU has its own thread. The TLS
1678 * variable current_cpu can be used deep in the code to find the
1679 * current CPUState for a given thread.
1682 static void *qemu_tcg_cpu_thread_fn(void *arg)
1684 CPUState *cpu = arg;
1686 assert(tcg_enabled());
1687 g_assert(!use_icount);
1689 rcu_register_thread();
1690 tcg_register_thread();
1692 qemu_mutex_lock_iothread();
1693 qemu_thread_get_self(cpu->thread);
1695 cpu->thread_id = qemu_get_thread_id();
1696 cpu->created = true;
1697 cpu->can_do_io = 1;
1698 current_cpu = cpu;
1699 qemu_cond_signal(&qemu_cpu_cond);
1701 /* process any pending work */
1702 cpu->exit_request = 1;
1704 do {
1705 if (cpu_can_run(cpu)) {
1706 int r;
1707 qemu_mutex_unlock_iothread();
1708 r = tcg_cpu_exec(cpu);
1709 qemu_mutex_lock_iothread();
1710 switch (r) {
1711 case EXCP_DEBUG:
1712 cpu_handle_guest_debug(cpu);
1713 break;
1714 case EXCP_HALTED:
1715 /* during start-up the vCPU is reset and the thread is
1716 * kicked several times. If we don't ensure we go back
1717 * to sleep in the halted state we won't cleanly
1718 * start-up when the vCPU is enabled.
1720 * cpu->halted should ensure we sleep in wait_io_event
1722 g_assert(cpu->halted);
1723 break;
1724 case EXCP_ATOMIC:
1725 qemu_mutex_unlock_iothread();
1726 cpu_exec_step_atomic(cpu);
1727 qemu_mutex_lock_iothread();
1728 default:
1729 /* Ignore everything else? */
1730 break;
1734 atomic_mb_set(&cpu->exit_request, 0);
1735 qemu_wait_io_event(cpu);
1736 } while (!cpu->unplug || cpu_can_run(cpu));
1738 qemu_tcg_destroy_vcpu(cpu);
1739 cpu->created = false;
1740 qemu_cond_signal(&qemu_cpu_cond);
1741 qemu_mutex_unlock_iothread();
1742 rcu_unregister_thread();
1743 return NULL;
1746 static void qemu_cpu_kick_thread(CPUState *cpu)
1748 #ifndef _WIN32
1749 int err;
1751 if (cpu->thread_kicked) {
1752 return;
1754 cpu->thread_kicked = true;
1755 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1756 if (err) {
1757 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1758 exit(1);
1760 #else /* _WIN32 */
1761 if (!qemu_cpu_is_self(cpu)) {
1762 if (whpx_enabled()) {
1763 whpx_vcpu_kick(cpu);
1764 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1765 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1766 __func__, GetLastError());
1767 exit(1);
1770 #endif
1773 void qemu_cpu_kick(CPUState *cpu)
1775 qemu_cond_broadcast(cpu->halt_cond);
1776 if (tcg_enabled()) {
1777 cpu_exit(cpu);
1778 /* NOP unless doing single-thread RR */
1779 qemu_cpu_kick_rr_cpu();
1780 } else {
1781 if (hax_enabled()) {
1783 * FIXME: race condition with the exit_request check in
1784 * hax_vcpu_hax_exec
1786 cpu->exit_request = 1;
1788 qemu_cpu_kick_thread(cpu);
1792 void qemu_cpu_kick_self(void)
1794 assert(current_cpu);
1795 qemu_cpu_kick_thread(current_cpu);
1798 bool qemu_cpu_is_self(CPUState *cpu)
1800 return qemu_thread_is_self(cpu->thread);
1803 bool qemu_in_vcpu_thread(void)
1805 return current_cpu && qemu_cpu_is_self(current_cpu);
1808 static __thread bool iothread_locked = false;
1810 bool qemu_mutex_iothread_locked(void)
1812 return iothread_locked;
1816 * The BQL is taken from so many places that it is worth profiling the
1817 * callers directly, instead of funneling them all through a single function.
1819 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1821 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1823 g_assert(!qemu_mutex_iothread_locked());
1824 bql_lock(&qemu_global_mutex, file, line);
1825 iothread_locked = true;
1828 void qemu_mutex_unlock_iothread(void)
1830 g_assert(qemu_mutex_iothread_locked());
1831 iothread_locked = false;
1832 qemu_mutex_unlock(&qemu_global_mutex);
1835 static bool all_vcpus_paused(void)
1837 CPUState *cpu;
1839 CPU_FOREACH(cpu) {
1840 if (!cpu->stopped) {
1841 return false;
1845 return true;
1848 void pause_all_vcpus(void)
1850 CPUState *cpu;
1852 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1853 CPU_FOREACH(cpu) {
1854 if (qemu_cpu_is_self(cpu)) {
1855 qemu_cpu_stop(cpu, true);
1856 } else {
1857 cpu->stop = true;
1858 qemu_cpu_kick(cpu);
1862 /* We need to drop the replay_lock so any vCPU threads woken up
1863 * can finish their replay tasks
1865 replay_mutex_unlock();
1867 while (!all_vcpus_paused()) {
1868 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1869 CPU_FOREACH(cpu) {
1870 qemu_cpu_kick(cpu);
1874 qemu_mutex_unlock_iothread();
1875 replay_mutex_lock();
1876 qemu_mutex_lock_iothread();
1879 void cpu_resume(CPUState *cpu)
1881 cpu->stop = false;
1882 cpu->stopped = false;
1883 qemu_cpu_kick(cpu);
1886 void resume_all_vcpus(void)
1888 CPUState *cpu;
1890 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1891 CPU_FOREACH(cpu) {
1892 cpu_resume(cpu);
1896 void cpu_remove_sync(CPUState *cpu)
1898 cpu->stop = true;
1899 cpu->unplug = true;
1900 qemu_cpu_kick(cpu);
1901 qemu_mutex_unlock_iothread();
1902 qemu_thread_join(cpu->thread);
1903 qemu_mutex_lock_iothread();
1906 /* For temporary buffers for forming a name */
1907 #define VCPU_THREAD_NAME_SIZE 16
1909 static void qemu_tcg_init_vcpu(CPUState *cpu)
1911 char thread_name[VCPU_THREAD_NAME_SIZE];
1912 static QemuCond *single_tcg_halt_cond;
1913 static QemuThread *single_tcg_cpu_thread;
1914 static int tcg_region_inited;
1916 assert(tcg_enabled());
1918 * Initialize TCG regions--once. Now is a good time, because:
1919 * (1) TCG's init context, prologue and target globals have been set up.
1920 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1921 * -accel flag is processed, so the check doesn't work then).
1923 if (!tcg_region_inited) {
1924 tcg_region_inited = 1;
1925 tcg_region_init();
1928 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1929 cpu->thread = g_malloc0(sizeof(QemuThread));
1930 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1931 qemu_cond_init(cpu->halt_cond);
1933 if (qemu_tcg_mttcg_enabled()) {
1934 /* create a thread per vCPU with TCG (MTTCG) */
1935 parallel_cpus = true;
1936 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1937 cpu->cpu_index);
1939 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1940 cpu, QEMU_THREAD_JOINABLE);
1942 } else {
1943 /* share a single thread for all cpus with TCG */
1944 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1945 qemu_thread_create(cpu->thread, thread_name,
1946 qemu_tcg_rr_cpu_thread_fn,
1947 cpu, QEMU_THREAD_JOINABLE);
1949 single_tcg_halt_cond = cpu->halt_cond;
1950 single_tcg_cpu_thread = cpu->thread;
1952 #ifdef _WIN32
1953 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1954 #endif
1955 } else {
1956 /* For non-MTTCG cases we share the thread */
1957 cpu->thread = single_tcg_cpu_thread;
1958 cpu->halt_cond = single_tcg_halt_cond;
1959 cpu->thread_id = first_cpu->thread_id;
1960 cpu->can_do_io = 1;
1961 cpu->created = true;
1965 static void qemu_hax_start_vcpu(CPUState *cpu)
1967 char thread_name[VCPU_THREAD_NAME_SIZE];
1969 cpu->thread = g_malloc0(sizeof(QemuThread));
1970 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1971 qemu_cond_init(cpu->halt_cond);
1973 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1974 cpu->cpu_index);
1975 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1976 cpu, QEMU_THREAD_JOINABLE);
1977 #ifdef _WIN32
1978 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1979 #endif
1982 static void qemu_kvm_start_vcpu(CPUState *cpu)
1984 char thread_name[VCPU_THREAD_NAME_SIZE];
1986 cpu->thread = g_malloc0(sizeof(QemuThread));
1987 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1988 qemu_cond_init(cpu->halt_cond);
1989 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1990 cpu->cpu_index);
1991 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1992 cpu, QEMU_THREAD_JOINABLE);
1995 static void qemu_hvf_start_vcpu(CPUState *cpu)
1997 char thread_name[VCPU_THREAD_NAME_SIZE];
1999 /* HVF currently does not support TCG, and only runs in
2000 * unrestricted-guest mode. */
2001 assert(hvf_enabled());
2003 cpu->thread = g_malloc0(sizeof(QemuThread));
2004 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2005 qemu_cond_init(cpu->halt_cond);
2007 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2008 cpu->cpu_index);
2009 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2010 cpu, QEMU_THREAD_JOINABLE);
2013 static void qemu_whpx_start_vcpu(CPUState *cpu)
2015 char thread_name[VCPU_THREAD_NAME_SIZE];
2017 cpu->thread = g_malloc0(sizeof(QemuThread));
2018 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2019 qemu_cond_init(cpu->halt_cond);
2020 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2021 cpu->cpu_index);
2022 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2023 cpu, QEMU_THREAD_JOINABLE);
2024 #ifdef _WIN32
2025 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2026 #endif
2029 static void qemu_dummy_start_vcpu(CPUState *cpu)
2031 char thread_name[VCPU_THREAD_NAME_SIZE];
2033 cpu->thread = g_malloc0(sizeof(QemuThread));
2034 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2035 qemu_cond_init(cpu->halt_cond);
2036 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2037 cpu->cpu_index);
2038 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2039 QEMU_THREAD_JOINABLE);
2042 void qemu_init_vcpu(CPUState *cpu)
2044 cpu->nr_cores = smp_cores;
2045 cpu->nr_threads = smp_threads;
2046 cpu->stopped = true;
2048 if (!cpu->as) {
2049 /* If the target cpu hasn't set up any address spaces itself,
2050 * give it the default one.
2052 cpu->num_ases = 1;
2053 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2056 if (kvm_enabled()) {
2057 qemu_kvm_start_vcpu(cpu);
2058 } else if (hax_enabled()) {
2059 qemu_hax_start_vcpu(cpu);
2060 } else if (hvf_enabled()) {
2061 qemu_hvf_start_vcpu(cpu);
2062 } else if (tcg_enabled()) {
2063 qemu_tcg_init_vcpu(cpu);
2064 } else if (whpx_enabled()) {
2065 qemu_whpx_start_vcpu(cpu);
2066 } else {
2067 qemu_dummy_start_vcpu(cpu);
2070 while (!cpu->created) {
2071 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2075 void cpu_stop_current(void)
2077 if (current_cpu) {
2078 qemu_cpu_stop(current_cpu, true);
2082 int vm_stop(RunState state)
2084 if (qemu_in_vcpu_thread()) {
2085 qemu_system_vmstop_request_prepare();
2086 qemu_system_vmstop_request(state);
2088 * FIXME: should not return to device code in case
2089 * vm_stop() has been requested.
2091 cpu_stop_current();
2092 return 0;
2095 return do_vm_stop(state, true);
2099 * Prepare for (re)starting the VM.
2100 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2101 * running or in case of an error condition), 0 otherwise.
2103 int vm_prepare_start(void)
2105 RunState requested;
2107 qemu_vmstop_requested(&requested);
2108 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2109 return -1;
2112 /* Ensure that a STOP/RESUME pair of events is emitted if a
2113 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2114 * example, according to documentation is always followed by
2115 * the STOP event.
2117 if (runstate_is_running()) {
2118 qapi_event_send_stop();
2119 qapi_event_send_resume();
2120 return -1;
2123 /* We are sending this now, but the CPUs will be resumed shortly later */
2124 qapi_event_send_resume();
2126 replay_enable_events();
2127 cpu_enable_ticks();
2128 runstate_set(RUN_STATE_RUNNING);
2129 vm_state_notify(1, RUN_STATE_RUNNING);
2130 return 0;
2133 void vm_start(void)
2135 if (!vm_prepare_start()) {
2136 resume_all_vcpus();
2140 /* does a state transition even if the VM is already stopped,
2141 current state is forgotten forever */
2142 int vm_stop_force_state(RunState state)
2144 if (runstate_is_running()) {
2145 return vm_stop(state);
2146 } else {
2147 runstate_set(state);
2149 bdrv_drain_all();
2150 /* Make sure to return an error if the flush in a previous vm_stop()
2151 * failed. */
2152 return bdrv_flush_all();
2156 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2158 /* XXX: implement xxx_cpu_list for targets that still miss it */
2159 #if defined(cpu_list)
2160 cpu_list(f, cpu_fprintf);
2161 #endif
2164 CpuInfoList *qmp_query_cpus(Error **errp)
2166 MachineState *ms = MACHINE(qdev_get_machine());
2167 MachineClass *mc = MACHINE_GET_CLASS(ms);
2168 CpuInfoList *head = NULL, *cur_item = NULL;
2169 CPUState *cpu;
2171 CPU_FOREACH(cpu) {
2172 CpuInfoList *info;
2173 #if defined(TARGET_I386)
2174 X86CPU *x86_cpu = X86_CPU(cpu);
2175 CPUX86State *env = &x86_cpu->env;
2176 #elif defined(TARGET_PPC)
2177 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2178 CPUPPCState *env = &ppc_cpu->env;
2179 #elif defined(TARGET_SPARC)
2180 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2181 CPUSPARCState *env = &sparc_cpu->env;
2182 #elif defined(TARGET_RISCV)
2183 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2184 CPURISCVState *env = &riscv_cpu->env;
2185 #elif defined(TARGET_MIPS)
2186 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2187 CPUMIPSState *env = &mips_cpu->env;
2188 #elif defined(TARGET_TRICORE)
2189 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2190 CPUTriCoreState *env = &tricore_cpu->env;
2191 #elif defined(TARGET_S390X)
2192 S390CPU *s390_cpu = S390_CPU(cpu);
2193 CPUS390XState *env = &s390_cpu->env;
2194 #endif
2196 cpu_synchronize_state(cpu);
2198 info = g_malloc0(sizeof(*info));
2199 info->value = g_malloc0(sizeof(*info->value));
2200 info->value->CPU = cpu->cpu_index;
2201 info->value->current = (cpu == first_cpu);
2202 info->value->halted = cpu->halted;
2203 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2204 info->value->thread_id = cpu->thread_id;
2205 #if defined(TARGET_I386)
2206 info->value->arch = CPU_INFO_ARCH_X86;
2207 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2208 #elif defined(TARGET_PPC)
2209 info->value->arch = CPU_INFO_ARCH_PPC;
2210 info->value->u.ppc.nip = env->nip;
2211 #elif defined(TARGET_SPARC)
2212 info->value->arch = CPU_INFO_ARCH_SPARC;
2213 info->value->u.q_sparc.pc = env->pc;
2214 info->value->u.q_sparc.npc = env->npc;
2215 #elif defined(TARGET_MIPS)
2216 info->value->arch = CPU_INFO_ARCH_MIPS;
2217 info->value->u.q_mips.PC = env->active_tc.PC;
2218 #elif defined(TARGET_TRICORE)
2219 info->value->arch = CPU_INFO_ARCH_TRICORE;
2220 info->value->u.tricore.PC = env->PC;
2221 #elif defined(TARGET_S390X)
2222 info->value->arch = CPU_INFO_ARCH_S390;
2223 info->value->u.s390.cpu_state = env->cpu_state;
2224 #elif defined(TARGET_RISCV)
2225 info->value->arch = CPU_INFO_ARCH_RISCV;
2226 info->value->u.riscv.pc = env->pc;
2227 #else
2228 info->value->arch = CPU_INFO_ARCH_OTHER;
2229 #endif
2230 info->value->has_props = !!mc->cpu_index_to_instance_props;
2231 if (info->value->has_props) {
2232 CpuInstanceProperties *props;
2233 props = g_malloc0(sizeof(*props));
2234 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2235 info->value->props = props;
2238 /* XXX: waiting for the qapi to support GSList */
2239 if (!cur_item) {
2240 head = cur_item = info;
2241 } else {
2242 cur_item->next = info;
2243 cur_item = info;
2247 return head;
2250 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2253 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2254 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2256 switch (target) {
2257 case SYS_EMU_TARGET_I386:
2258 case SYS_EMU_TARGET_X86_64:
2259 return CPU_INFO_ARCH_X86;
2261 case SYS_EMU_TARGET_PPC:
2262 case SYS_EMU_TARGET_PPC64:
2263 return CPU_INFO_ARCH_PPC;
2265 case SYS_EMU_TARGET_SPARC:
2266 case SYS_EMU_TARGET_SPARC64:
2267 return CPU_INFO_ARCH_SPARC;
2269 case SYS_EMU_TARGET_MIPS:
2270 case SYS_EMU_TARGET_MIPSEL:
2271 case SYS_EMU_TARGET_MIPS64:
2272 case SYS_EMU_TARGET_MIPS64EL:
2273 return CPU_INFO_ARCH_MIPS;
2275 case SYS_EMU_TARGET_TRICORE:
2276 return CPU_INFO_ARCH_TRICORE;
2278 case SYS_EMU_TARGET_S390X:
2279 return CPU_INFO_ARCH_S390;
2281 case SYS_EMU_TARGET_RISCV32:
2282 case SYS_EMU_TARGET_RISCV64:
2283 return CPU_INFO_ARCH_RISCV;
2285 default:
2286 return CPU_INFO_ARCH_OTHER;
2290 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2292 #ifdef TARGET_S390X
2293 S390CPU *s390_cpu = S390_CPU(cpu);
2294 CPUS390XState *env = &s390_cpu->env;
2296 info->cpu_state = env->cpu_state;
2297 #else
2298 abort();
2299 #endif
2303 * fast means: we NEVER interrupt vCPU threads to retrieve
2304 * information from KVM.
2306 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2308 MachineState *ms = MACHINE(qdev_get_machine());
2309 MachineClass *mc = MACHINE_GET_CLASS(ms);
2310 CpuInfoFastList *head = NULL, *cur_item = NULL;
2311 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2312 -1, &error_abort);
2313 CPUState *cpu;
2315 CPU_FOREACH(cpu) {
2316 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2317 info->value = g_malloc0(sizeof(*info->value));
2319 info->value->cpu_index = cpu->cpu_index;
2320 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2321 info->value->thread_id = cpu->thread_id;
2323 info->value->has_props = !!mc->cpu_index_to_instance_props;
2324 if (info->value->has_props) {
2325 CpuInstanceProperties *props;
2326 props = g_malloc0(sizeof(*props));
2327 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2328 info->value->props = props;
2331 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2332 info->value->target = target;
2333 if (target == SYS_EMU_TARGET_S390X) {
2334 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2337 if (!cur_item) {
2338 head = cur_item = info;
2339 } else {
2340 cur_item->next = info;
2341 cur_item = info;
2345 return head;
2348 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2349 bool has_cpu, int64_t cpu_index, Error **errp)
2351 FILE *f;
2352 uint32_t l;
2353 CPUState *cpu;
2354 uint8_t buf[1024];
2355 int64_t orig_addr = addr, orig_size = size;
2357 if (!has_cpu) {
2358 cpu_index = 0;
2361 cpu = qemu_get_cpu(cpu_index);
2362 if (cpu == NULL) {
2363 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2364 "a CPU number");
2365 return;
2368 f = fopen(filename, "wb");
2369 if (!f) {
2370 error_setg_file_open(errp, errno, filename);
2371 return;
2374 while (size != 0) {
2375 l = sizeof(buf);
2376 if (l > size)
2377 l = size;
2378 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2379 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2380 " specified", orig_addr, orig_size);
2381 goto exit;
2383 if (fwrite(buf, 1, l, f) != l) {
2384 error_setg(errp, QERR_IO_ERROR);
2385 goto exit;
2387 addr += l;
2388 size -= l;
2391 exit:
2392 fclose(f);
2395 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2396 Error **errp)
2398 FILE *f;
2399 uint32_t l;
2400 uint8_t buf[1024];
2402 f = fopen(filename, "wb");
2403 if (!f) {
2404 error_setg_file_open(errp, errno, filename);
2405 return;
2408 while (size != 0) {
2409 l = sizeof(buf);
2410 if (l > size)
2411 l = size;
2412 cpu_physical_memory_read(addr, buf, l);
2413 if (fwrite(buf, 1, l, f) != l) {
2414 error_setg(errp, QERR_IO_ERROR);
2415 goto exit;
2417 addr += l;
2418 size -= l;
2421 exit:
2422 fclose(f);
2425 void qmp_inject_nmi(Error **errp)
2427 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2430 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2432 if (!use_icount) {
2433 return;
2436 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2437 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2438 if (icount_align_option) {
2439 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2440 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2441 } else {
2442 cpu_fprintf(f, "Max guest delay NA\n");
2443 cpu_fprintf(f, "Max guest advance NA\n");