aio: push aio_context_acquire/release down to dispatching
[qemu/ar7.git] / cpus.c
blob0bcb5b50b6c2cb884ab1bf8900fde9f4e3cfe966
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/hw_accel.h"
37 #include "sysemu/kvm.h"
38 #include "sysemu/hax.h"
39 #include "qmp-commands.h"
40 #include "exec/exec-all.h"
42 #include "qemu/thread.h"
43 #include "sysemu/cpus.h"
44 #include "sysemu/qtest.h"
45 #include "qemu/main-loop.h"
46 #include "qemu/bitmap.h"
47 #include "qemu/seqlock.h"
48 #include "qapi-event.h"
49 #include "hw/nmi.h"
50 #include "sysemu/replay.h"
52 #ifndef _WIN32
53 #include "qemu/compatfd.h"
54 #endif
56 #ifdef CONFIG_LINUX
58 #include <sys/prctl.h>
60 #ifndef PR_MCE_KILL
61 #define PR_MCE_KILL 33
62 #endif
64 #ifndef PR_MCE_KILL_SET
65 #define PR_MCE_KILL_SET 1
66 #endif
68 #ifndef PR_MCE_KILL_EARLY
69 #define PR_MCE_KILL_EARLY 1
70 #endif
72 #endif /* CONFIG_LINUX */
74 int64_t max_delay;
75 int64_t max_advance;
77 /* vcpu throttling controls */
78 static QEMUTimer *throttle_timer;
79 static unsigned int throttle_percentage;
81 #define CPU_THROTTLE_PCT_MIN 1
82 #define CPU_THROTTLE_PCT_MAX 99
83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
85 bool cpu_is_stopped(CPUState *cpu)
87 return cpu->stopped || !runstate_is_running();
90 static bool cpu_thread_is_idle(CPUState *cpu)
92 if (cpu->stop || cpu->queued_work_first) {
93 return false;
95 if (cpu_is_stopped(cpu)) {
96 return true;
98 if (!cpu->halted || cpu_has_work(cpu) ||
99 kvm_halt_in_kernel()) {
100 return false;
102 return true;
105 static bool all_cpu_threads_idle(void)
107 CPUState *cpu;
109 CPU_FOREACH(cpu) {
110 if (!cpu_thread_is_idle(cpu)) {
111 return false;
114 return true;
117 /***********************************************************/
118 /* guest cycle counter */
120 /* Protected by TimersState seqlock */
122 static bool icount_sleep = true;
123 static int64_t vm_clock_warp_start = -1;
124 /* Conversion factor from emulated instructions to virtual clock ticks. */
125 static int icount_time_shift;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
129 static QEMUTimer *icount_rt_timer;
130 static QEMUTimer *icount_vm_timer;
131 static QEMUTimer *icount_warp_timer;
133 typedef struct TimersState {
134 /* Protected by BQL. */
135 int64_t cpu_ticks_prev;
136 int64_t cpu_ticks_offset;
138 /* cpu_clock_offset can be read out of BQL, so protect it with
139 * this lock.
141 QemuSeqLock vm_clock_seqlock;
142 int64_t cpu_clock_offset;
143 int32_t cpu_ticks_enabled;
144 int64_t dummy;
146 /* Compensate for varying guest execution speed. */
147 int64_t qemu_icount_bias;
148 /* Only written by TCG thread */
149 int64_t qemu_icount;
150 } TimersState;
152 static TimersState timers_state;
154 int64_t cpu_get_icount_raw(void)
156 int64_t icount;
157 CPUState *cpu = current_cpu;
159 icount = timers_state.qemu_icount;
160 if (cpu) {
161 if (!cpu->can_do_io) {
162 fprintf(stderr, "Bad icount read\n");
163 exit(1);
165 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
167 return icount;
170 /* Return the virtual CPU time, based on the instruction counter. */
171 static int64_t cpu_get_icount_locked(void)
173 int64_t icount = cpu_get_icount_raw();
174 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
177 int64_t cpu_get_icount(void)
179 int64_t icount;
180 unsigned start;
182 do {
183 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
184 icount = cpu_get_icount_locked();
185 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
187 return icount;
190 int64_t cpu_icount_to_ns(int64_t icount)
192 return icount << icount_time_shift;
195 /* return the time elapsed in VM between vm_start and vm_stop. Unless
196 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
197 * counter.
199 * Caller must hold the BQL
201 int64_t cpu_get_ticks(void)
203 int64_t ticks;
205 if (use_icount) {
206 return cpu_get_icount();
209 ticks = timers_state.cpu_ticks_offset;
210 if (timers_state.cpu_ticks_enabled) {
211 ticks += cpu_get_host_ticks();
214 if (timers_state.cpu_ticks_prev > ticks) {
215 /* Note: non increasing ticks may happen if the host uses
216 software suspend */
217 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
218 ticks = timers_state.cpu_ticks_prev;
221 timers_state.cpu_ticks_prev = ticks;
222 return ticks;
225 static int64_t cpu_get_clock_locked(void)
227 int64_t time;
229 time = timers_state.cpu_clock_offset;
230 if (timers_state.cpu_ticks_enabled) {
231 time += get_clock();
234 return time;
237 /* Return the monotonic time elapsed in VM, i.e.,
238 * the time between vm_start and vm_stop
240 int64_t cpu_get_clock(void)
242 int64_t ti;
243 unsigned start;
245 do {
246 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
247 ti = cpu_get_clock_locked();
248 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
250 return ti;
253 /* enable cpu_get_ticks()
254 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
256 void cpu_enable_ticks(void)
258 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
259 seqlock_write_begin(&timers_state.vm_clock_seqlock);
260 if (!timers_state.cpu_ticks_enabled) {
261 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
262 timers_state.cpu_clock_offset -= get_clock();
263 timers_state.cpu_ticks_enabled = 1;
265 seqlock_write_end(&timers_state.vm_clock_seqlock);
268 /* disable cpu_get_ticks() : the clock is stopped. You must not call
269 * cpu_get_ticks() after that.
270 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
272 void cpu_disable_ticks(void)
274 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
275 seqlock_write_begin(&timers_state.vm_clock_seqlock);
276 if (timers_state.cpu_ticks_enabled) {
277 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
278 timers_state.cpu_clock_offset = cpu_get_clock_locked();
279 timers_state.cpu_ticks_enabled = 0;
281 seqlock_write_end(&timers_state.vm_clock_seqlock);
284 /* Correlation between real and virtual time is always going to be
285 fairly approximate, so ignore small variation.
286 When the guest is idle real and virtual time will be aligned in
287 the IO wait loop. */
288 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
290 static void icount_adjust(void)
292 int64_t cur_time;
293 int64_t cur_icount;
294 int64_t delta;
296 /* Protected by TimersState mutex. */
297 static int64_t last_delta;
299 /* If the VM is not running, then do nothing. */
300 if (!runstate_is_running()) {
301 return;
304 seqlock_write_begin(&timers_state.vm_clock_seqlock);
305 cur_time = cpu_get_clock_locked();
306 cur_icount = cpu_get_icount_locked();
308 delta = cur_icount - cur_time;
309 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
310 if (delta > 0
311 && last_delta + ICOUNT_WOBBLE < delta * 2
312 && icount_time_shift > 0) {
313 /* The guest is getting too far ahead. Slow time down. */
314 icount_time_shift--;
316 if (delta < 0
317 && last_delta - ICOUNT_WOBBLE > delta * 2
318 && icount_time_shift < MAX_ICOUNT_SHIFT) {
319 /* The guest is getting too far behind. Speed time up. */
320 icount_time_shift++;
322 last_delta = delta;
323 timers_state.qemu_icount_bias = cur_icount
324 - (timers_state.qemu_icount << icount_time_shift);
325 seqlock_write_end(&timers_state.vm_clock_seqlock);
328 static void icount_adjust_rt(void *opaque)
330 timer_mod(icount_rt_timer,
331 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
332 icount_adjust();
335 static void icount_adjust_vm(void *opaque)
337 timer_mod(icount_vm_timer,
338 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
339 NANOSECONDS_PER_SECOND / 10);
340 icount_adjust();
343 static int64_t qemu_icount_round(int64_t count)
345 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
348 static void icount_warp_rt(void)
350 unsigned seq;
351 int64_t warp_start;
353 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
354 * changes from -1 to another value, so the race here is okay.
356 do {
357 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
358 warp_start = vm_clock_warp_start;
359 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
361 if (warp_start == -1) {
362 return;
365 seqlock_write_begin(&timers_state.vm_clock_seqlock);
366 if (runstate_is_running()) {
367 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
368 cpu_get_clock_locked());
369 int64_t warp_delta;
371 warp_delta = clock - vm_clock_warp_start;
372 if (use_icount == 2) {
374 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
375 * far ahead of real time.
377 int64_t cur_icount = cpu_get_icount_locked();
378 int64_t delta = clock - cur_icount;
379 warp_delta = MIN(warp_delta, delta);
381 timers_state.qemu_icount_bias += warp_delta;
383 vm_clock_warp_start = -1;
384 seqlock_write_end(&timers_state.vm_clock_seqlock);
386 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
387 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
391 static void icount_timer_cb(void *opaque)
393 /* No need for a checkpoint because the timer already synchronizes
394 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
396 icount_warp_rt();
399 void qtest_clock_warp(int64_t dest)
401 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
402 AioContext *aio_context;
403 assert(qtest_enabled());
404 aio_context = qemu_get_aio_context();
405 while (clock < dest) {
406 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
407 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
410 timers_state.qemu_icount_bias += warp;
411 seqlock_write_end(&timers_state.vm_clock_seqlock);
413 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
414 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
415 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
417 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
420 void qemu_start_warp_timer(void)
422 int64_t clock;
423 int64_t deadline;
425 if (!use_icount) {
426 return;
429 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
430 * do not fire, so computing the deadline does not make sense.
432 if (!runstate_is_running()) {
433 return;
436 /* warp clock deterministically in record/replay mode */
437 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
438 return;
441 if (!all_cpu_threads_idle()) {
442 return;
445 if (qtest_enabled()) {
446 /* When testing, qtest commands advance icount. */
447 return;
450 /* We want to use the earliest deadline from ALL vm_clocks */
451 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
452 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
453 if (deadline < 0) {
454 static bool notified;
455 if (!icount_sleep && !notified) {
456 error_report("WARNING: icount sleep disabled and no active timers");
457 notified = true;
459 return;
462 if (deadline > 0) {
464 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
465 * sleep. Otherwise, the CPU might be waiting for a future timer
466 * interrupt to wake it up, but the interrupt never comes because
467 * the vCPU isn't running any insns and thus doesn't advance the
468 * QEMU_CLOCK_VIRTUAL.
470 if (!icount_sleep) {
472 * We never let VCPUs sleep in no sleep icount mode.
473 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
474 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
475 * It is useful when we want a deterministic execution time,
476 * isolated from host latencies.
478 seqlock_write_begin(&timers_state.vm_clock_seqlock);
479 timers_state.qemu_icount_bias += deadline;
480 seqlock_write_end(&timers_state.vm_clock_seqlock);
481 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
482 } else {
484 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
485 * "real" time, (related to the time left until the next event) has
486 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
487 * This avoids that the warps are visible externally; for example,
488 * you will not be sending network packets continuously instead of
489 * every 100ms.
491 seqlock_write_begin(&timers_state.vm_clock_seqlock);
492 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
493 vm_clock_warp_start = clock;
495 seqlock_write_end(&timers_state.vm_clock_seqlock);
496 timer_mod_anticipate(icount_warp_timer, clock + deadline);
498 } else if (deadline == 0) {
499 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
503 static void qemu_account_warp_timer(void)
505 if (!use_icount || !icount_sleep) {
506 return;
509 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
510 * do not fire, so computing the deadline does not make sense.
512 if (!runstate_is_running()) {
513 return;
516 /* warp clock deterministically in record/replay mode */
517 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
518 return;
521 timer_del(icount_warp_timer);
522 icount_warp_rt();
525 static bool icount_state_needed(void *opaque)
527 return use_icount;
531 * This is a subsection for icount migration.
533 static const VMStateDescription icount_vmstate_timers = {
534 .name = "timer/icount",
535 .version_id = 1,
536 .minimum_version_id = 1,
537 .needed = icount_state_needed,
538 .fields = (VMStateField[]) {
539 VMSTATE_INT64(qemu_icount_bias, TimersState),
540 VMSTATE_INT64(qemu_icount, TimersState),
541 VMSTATE_END_OF_LIST()
545 static const VMStateDescription vmstate_timers = {
546 .name = "timer",
547 .version_id = 2,
548 .minimum_version_id = 1,
549 .fields = (VMStateField[]) {
550 VMSTATE_INT64(cpu_ticks_offset, TimersState),
551 VMSTATE_INT64(dummy, TimersState),
552 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
553 VMSTATE_END_OF_LIST()
555 .subsections = (const VMStateDescription*[]) {
556 &icount_vmstate_timers,
557 NULL
561 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
563 double pct;
564 double throttle_ratio;
565 long sleeptime_ns;
567 if (!cpu_throttle_get_percentage()) {
568 return;
571 pct = (double)cpu_throttle_get_percentage()/100;
572 throttle_ratio = pct / (1 - pct);
573 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
575 qemu_mutex_unlock_iothread();
576 atomic_set(&cpu->throttle_thread_scheduled, 0);
577 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
578 qemu_mutex_lock_iothread();
581 static void cpu_throttle_timer_tick(void *opaque)
583 CPUState *cpu;
584 double pct;
586 /* Stop the timer if needed */
587 if (!cpu_throttle_get_percentage()) {
588 return;
590 CPU_FOREACH(cpu) {
591 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
592 async_run_on_cpu(cpu, cpu_throttle_thread,
593 RUN_ON_CPU_NULL);
597 pct = (double)cpu_throttle_get_percentage()/100;
598 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
599 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
602 void cpu_throttle_set(int new_throttle_pct)
604 /* Ensure throttle percentage is within valid range */
605 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
606 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
608 atomic_set(&throttle_percentage, new_throttle_pct);
610 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
611 CPU_THROTTLE_TIMESLICE_NS);
614 void cpu_throttle_stop(void)
616 atomic_set(&throttle_percentage, 0);
619 bool cpu_throttle_active(void)
621 return (cpu_throttle_get_percentage() != 0);
624 int cpu_throttle_get_percentage(void)
626 return atomic_read(&throttle_percentage);
629 void cpu_ticks_init(void)
631 seqlock_init(&timers_state.vm_clock_seqlock);
632 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
633 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
634 cpu_throttle_timer_tick, NULL);
637 void configure_icount(QemuOpts *opts, Error **errp)
639 const char *option;
640 char *rem_str = NULL;
642 option = qemu_opt_get(opts, "shift");
643 if (!option) {
644 if (qemu_opt_get(opts, "align") != NULL) {
645 error_setg(errp, "Please specify shift option when using align");
647 return;
650 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
651 if (icount_sleep) {
652 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
653 icount_timer_cb, NULL);
656 icount_align_option = qemu_opt_get_bool(opts, "align", false);
658 if (icount_align_option && !icount_sleep) {
659 error_setg(errp, "align=on and sleep=off are incompatible");
661 if (strcmp(option, "auto") != 0) {
662 errno = 0;
663 icount_time_shift = strtol(option, &rem_str, 0);
664 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
665 error_setg(errp, "icount: Invalid shift value");
667 use_icount = 1;
668 return;
669 } else if (icount_align_option) {
670 error_setg(errp, "shift=auto and align=on are incompatible");
671 } else if (!icount_sleep) {
672 error_setg(errp, "shift=auto and sleep=off are incompatible");
675 use_icount = 2;
677 /* 125MIPS seems a reasonable initial guess at the guest speed.
678 It will be corrected fairly quickly anyway. */
679 icount_time_shift = 3;
681 /* Have both realtime and virtual time triggers for speed adjustment.
682 The realtime trigger catches emulated time passing too slowly,
683 the virtual time trigger catches emulated time passing too fast.
684 Realtime triggers occur even when idle, so use them less frequently
685 than VM triggers. */
686 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
687 icount_adjust_rt, NULL);
688 timer_mod(icount_rt_timer,
689 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
690 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
691 icount_adjust_vm, NULL);
692 timer_mod(icount_vm_timer,
693 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
694 NANOSECONDS_PER_SECOND / 10);
697 /***********************************************************/
698 void hw_error(const char *fmt, ...)
700 va_list ap;
701 CPUState *cpu;
703 va_start(ap, fmt);
704 fprintf(stderr, "qemu: hardware error: ");
705 vfprintf(stderr, fmt, ap);
706 fprintf(stderr, "\n");
707 CPU_FOREACH(cpu) {
708 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
709 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
711 va_end(ap);
712 abort();
715 void cpu_synchronize_all_states(void)
717 CPUState *cpu;
719 CPU_FOREACH(cpu) {
720 cpu_synchronize_state(cpu);
724 void cpu_synchronize_all_post_reset(void)
726 CPUState *cpu;
728 CPU_FOREACH(cpu) {
729 cpu_synchronize_post_reset(cpu);
733 void cpu_synchronize_all_post_init(void)
735 CPUState *cpu;
737 CPU_FOREACH(cpu) {
738 cpu_synchronize_post_init(cpu);
742 static int do_vm_stop(RunState state)
744 int ret = 0;
746 if (runstate_is_running()) {
747 cpu_disable_ticks();
748 pause_all_vcpus();
749 runstate_set(state);
750 vm_state_notify(0, state);
751 qapi_event_send_stop(&error_abort);
754 bdrv_drain_all();
755 replay_disable_events();
756 ret = bdrv_flush_all();
758 return ret;
761 static bool cpu_can_run(CPUState *cpu)
763 if (cpu->stop) {
764 return false;
766 if (cpu_is_stopped(cpu)) {
767 return false;
769 return true;
772 static void cpu_handle_guest_debug(CPUState *cpu)
774 gdb_set_stop_cpu(cpu);
775 qemu_system_debug_request();
776 cpu->stopped = true;
779 #ifdef CONFIG_LINUX
780 static void sigbus_reraise(void)
782 sigset_t set;
783 struct sigaction action;
785 memset(&action, 0, sizeof(action));
786 action.sa_handler = SIG_DFL;
787 if (!sigaction(SIGBUS, &action, NULL)) {
788 raise(SIGBUS);
789 sigemptyset(&set);
790 sigaddset(&set, SIGBUS);
791 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
793 perror("Failed to re-raise SIGBUS!\n");
794 abort();
797 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
798 void *ctx)
800 if (kvm_on_sigbus(siginfo->ssi_code,
801 (void *)(intptr_t)siginfo->ssi_addr)) {
802 sigbus_reraise();
806 static void qemu_init_sigbus(void)
808 struct sigaction action;
810 memset(&action, 0, sizeof(action));
811 action.sa_flags = SA_SIGINFO;
812 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
813 sigaction(SIGBUS, &action, NULL);
815 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
818 static void qemu_kvm_eat_signals(CPUState *cpu)
820 struct timespec ts = { 0, 0 };
821 siginfo_t siginfo;
822 sigset_t waitset;
823 sigset_t chkset;
824 int r;
826 sigemptyset(&waitset);
827 sigaddset(&waitset, SIG_IPI);
828 sigaddset(&waitset, SIGBUS);
830 do {
831 r = sigtimedwait(&waitset, &siginfo, &ts);
832 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
833 perror("sigtimedwait");
834 exit(1);
837 switch (r) {
838 case SIGBUS:
839 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
840 sigbus_reraise();
842 break;
843 default:
844 break;
847 r = sigpending(&chkset);
848 if (r == -1) {
849 perror("sigpending");
850 exit(1);
852 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
855 #else /* !CONFIG_LINUX */
857 static void qemu_init_sigbus(void)
861 static void qemu_kvm_eat_signals(CPUState *cpu)
864 #endif /* !CONFIG_LINUX */
866 #ifndef _WIN32
867 static void dummy_signal(int sig)
871 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
873 int r;
874 sigset_t set;
875 struct sigaction sigact;
877 memset(&sigact, 0, sizeof(sigact));
878 sigact.sa_handler = dummy_signal;
879 sigaction(SIG_IPI, &sigact, NULL);
881 pthread_sigmask(SIG_BLOCK, NULL, &set);
882 sigdelset(&set, SIG_IPI);
883 sigdelset(&set, SIGBUS);
884 r = kvm_set_signal_mask(cpu, &set);
885 if (r) {
886 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
887 exit(1);
891 #else /* _WIN32 */
892 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
894 abort();
896 #endif /* _WIN32 */
898 static QemuMutex qemu_global_mutex;
899 static QemuCond qemu_io_proceeded_cond;
900 static unsigned iothread_requesting_mutex;
902 static QemuThread io_thread;
904 /* cpu creation */
905 static QemuCond qemu_cpu_cond;
906 /* system init */
907 static QemuCond qemu_pause_cond;
909 void qemu_init_cpu_loop(void)
911 qemu_init_sigbus();
912 qemu_cond_init(&qemu_cpu_cond);
913 qemu_cond_init(&qemu_pause_cond);
914 qemu_cond_init(&qemu_io_proceeded_cond);
915 qemu_mutex_init(&qemu_global_mutex);
917 qemu_thread_get_self(&io_thread);
920 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
922 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
925 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
927 if (kvm_destroy_vcpu(cpu) < 0) {
928 error_report("kvm_destroy_vcpu failed");
929 exit(EXIT_FAILURE);
933 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
937 static void qemu_wait_io_event_common(CPUState *cpu)
939 if (cpu->stop) {
940 cpu->stop = false;
941 cpu->stopped = true;
942 qemu_cond_broadcast(&qemu_pause_cond);
944 process_queued_cpu_work(cpu);
945 cpu->thread_kicked = false;
948 static void qemu_tcg_wait_io_event(CPUState *cpu)
950 while (all_cpu_threads_idle()) {
951 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
954 while (iothread_requesting_mutex) {
955 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
958 CPU_FOREACH(cpu) {
959 qemu_wait_io_event_common(cpu);
963 static void qemu_kvm_wait_io_event(CPUState *cpu)
965 while (cpu_thread_is_idle(cpu)) {
966 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
969 qemu_kvm_eat_signals(cpu);
970 qemu_wait_io_event_common(cpu);
973 static void *qemu_kvm_cpu_thread_fn(void *arg)
975 CPUState *cpu = arg;
976 int r;
978 rcu_register_thread();
980 qemu_mutex_lock_iothread();
981 qemu_thread_get_self(cpu->thread);
982 cpu->thread_id = qemu_get_thread_id();
983 cpu->can_do_io = 1;
984 current_cpu = cpu;
986 r = kvm_init_vcpu(cpu);
987 if (r < 0) {
988 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
989 exit(1);
992 qemu_kvm_init_cpu_signals(cpu);
994 /* signal CPU creation */
995 cpu->created = true;
996 qemu_cond_signal(&qemu_cpu_cond);
998 do {
999 if (cpu_can_run(cpu)) {
1000 r = kvm_cpu_exec(cpu);
1001 if (r == EXCP_DEBUG) {
1002 cpu_handle_guest_debug(cpu);
1005 qemu_kvm_wait_io_event(cpu);
1006 } while (!cpu->unplug || cpu_can_run(cpu));
1008 qemu_kvm_destroy_vcpu(cpu);
1009 cpu->created = false;
1010 qemu_cond_signal(&qemu_cpu_cond);
1011 qemu_mutex_unlock_iothread();
1012 return NULL;
1015 static void *qemu_dummy_cpu_thread_fn(void *arg)
1017 #ifdef _WIN32
1018 fprintf(stderr, "qtest is not supported under Windows\n");
1019 exit(1);
1020 #else
1021 CPUState *cpu = arg;
1022 sigset_t waitset;
1023 int r;
1025 rcu_register_thread();
1027 qemu_mutex_lock_iothread();
1028 qemu_thread_get_self(cpu->thread);
1029 cpu->thread_id = qemu_get_thread_id();
1030 cpu->can_do_io = 1;
1032 sigemptyset(&waitset);
1033 sigaddset(&waitset, SIG_IPI);
1035 /* signal CPU creation */
1036 cpu->created = true;
1037 qemu_cond_signal(&qemu_cpu_cond);
1039 current_cpu = cpu;
1040 while (1) {
1041 current_cpu = NULL;
1042 qemu_mutex_unlock_iothread();
1043 do {
1044 int sig;
1045 r = sigwait(&waitset, &sig);
1046 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1047 if (r == -1) {
1048 perror("sigwait");
1049 exit(1);
1051 qemu_mutex_lock_iothread();
1052 current_cpu = cpu;
1053 qemu_wait_io_event_common(cpu);
1056 return NULL;
1057 #endif
1060 static int64_t tcg_get_icount_limit(void)
1062 int64_t deadline;
1064 if (replay_mode != REPLAY_MODE_PLAY) {
1065 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1067 /* Maintain prior (possibly buggy) behaviour where if no deadline
1068 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1069 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1070 * nanoseconds.
1072 if ((deadline < 0) || (deadline > INT32_MAX)) {
1073 deadline = INT32_MAX;
1076 return qemu_icount_round(deadline);
1077 } else {
1078 return replay_get_instructions();
1082 static void handle_icount_deadline(void)
1084 if (use_icount) {
1085 int64_t deadline =
1086 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1088 if (deadline == 0) {
1089 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1094 static int tcg_cpu_exec(CPUState *cpu)
1096 int ret;
1097 #ifdef CONFIG_PROFILER
1098 int64_t ti;
1099 #endif
1101 #ifdef CONFIG_PROFILER
1102 ti = profile_getclock();
1103 #endif
1104 if (use_icount) {
1105 int64_t count;
1106 int decr;
1107 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1108 + cpu->icount_extra);
1109 cpu->icount_decr.u16.low = 0;
1110 cpu->icount_extra = 0;
1111 count = tcg_get_icount_limit();
1112 timers_state.qemu_icount += count;
1113 decr = (count > 0xffff) ? 0xffff : count;
1114 count -= decr;
1115 cpu->icount_decr.u16.low = decr;
1116 cpu->icount_extra = count;
1118 cpu_exec_start(cpu);
1119 ret = cpu_exec(cpu);
1120 cpu_exec_end(cpu);
1121 #ifdef CONFIG_PROFILER
1122 tcg_time += profile_getclock() - ti;
1123 #endif
1124 if (use_icount) {
1125 /* Fold pending instructions back into the
1126 instruction counter, and clear the interrupt flag. */
1127 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1128 + cpu->icount_extra);
1129 cpu->icount_decr.u32 = 0;
1130 cpu->icount_extra = 0;
1131 replay_account_executed_instructions();
1133 return ret;
1136 /* Destroy any remaining vCPUs which have been unplugged and have
1137 * finished running
1139 static void deal_with_unplugged_cpus(void)
1141 CPUState *cpu;
1143 CPU_FOREACH(cpu) {
1144 if (cpu->unplug && !cpu_can_run(cpu)) {
1145 qemu_tcg_destroy_vcpu(cpu);
1146 cpu->created = false;
1147 qemu_cond_signal(&qemu_cpu_cond);
1148 break;
1153 static void *qemu_tcg_cpu_thread_fn(void *arg)
1155 CPUState *cpu = arg;
1157 rcu_register_thread();
1159 qemu_mutex_lock_iothread();
1160 qemu_thread_get_self(cpu->thread);
1162 CPU_FOREACH(cpu) {
1163 cpu->thread_id = qemu_get_thread_id();
1164 cpu->created = true;
1165 cpu->can_do_io = 1;
1167 qemu_cond_signal(&qemu_cpu_cond);
1169 /* wait for initial kick-off after machine start */
1170 while (first_cpu->stopped) {
1171 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1173 /* process any pending work */
1174 CPU_FOREACH(cpu) {
1175 qemu_wait_io_event_common(cpu);
1179 /* process any pending work */
1180 atomic_mb_set(&exit_request, 1);
1182 cpu = first_cpu;
1184 while (1) {
1185 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1186 qemu_account_warp_timer();
1188 if (!cpu) {
1189 cpu = first_cpu;
1192 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1194 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1195 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1197 if (cpu_can_run(cpu)) {
1198 int r;
1199 r = tcg_cpu_exec(cpu);
1200 if (r == EXCP_DEBUG) {
1201 cpu_handle_guest_debug(cpu);
1202 break;
1204 } else if (cpu->stop || cpu->stopped) {
1205 if (cpu->unplug) {
1206 cpu = CPU_NEXT(cpu);
1208 break;
1211 } /* for cpu.. */
1213 /* Pairs with smp_wmb in qemu_cpu_kick. */
1214 atomic_mb_set(&exit_request, 0);
1216 handle_icount_deadline();
1218 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1219 deal_with_unplugged_cpus();
1222 return NULL;
1225 static void *qemu_hax_cpu_thread_fn(void *arg)
1227 CPUState *cpu = arg;
1228 int r;
1229 qemu_thread_get_self(cpu->thread);
1230 qemu_mutex_lock(&qemu_global_mutex);
1232 cpu->thread_id = qemu_get_thread_id();
1233 cpu->created = true;
1234 cpu->halted = 0;
1235 current_cpu = cpu;
1237 hax_init_vcpu(cpu);
1238 qemu_cond_signal(&qemu_cpu_cond);
1240 while (1) {
1241 if (cpu_can_run(cpu)) {
1242 r = hax_smp_cpu_exec(cpu);
1243 if (r == EXCP_DEBUG) {
1244 cpu_handle_guest_debug(cpu);
1248 while (cpu_thread_is_idle(cpu)) {
1249 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1251 #ifdef _WIN32
1252 SleepEx(0, TRUE);
1253 #endif
1254 qemu_wait_io_event_common(cpu);
1256 return NULL;
1259 #ifdef _WIN32
1260 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1263 #endif
1265 static void qemu_cpu_kick_thread(CPUState *cpu)
1267 #ifndef _WIN32
1268 int err;
1270 if (cpu->thread_kicked) {
1271 return;
1273 cpu->thread_kicked = true;
1274 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1275 if (err) {
1276 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1277 exit(1);
1279 #else /* _WIN32 */
1280 if (!qemu_cpu_is_self(cpu)) {
1281 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1282 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1283 __func__, GetLastError());
1284 exit(1);
1287 #endif
1290 static void qemu_cpu_kick_no_halt(void)
1292 CPUState *cpu;
1293 /* Ensure whatever caused the exit has reached the CPU threads before
1294 * writing exit_request.
1296 atomic_mb_set(&exit_request, 1);
1297 cpu = atomic_mb_read(&tcg_current_cpu);
1298 if (cpu) {
1299 cpu_exit(cpu);
1303 void qemu_cpu_kick(CPUState *cpu)
1305 qemu_cond_broadcast(cpu->halt_cond);
1306 if (tcg_enabled()) {
1307 qemu_cpu_kick_no_halt();
1308 } else {
1309 if (hax_enabled()) {
1311 * FIXME: race condition with the exit_request check in
1312 * hax_vcpu_hax_exec
1314 cpu->exit_request = 1;
1316 qemu_cpu_kick_thread(cpu);
1320 void qemu_cpu_kick_self(void)
1322 assert(current_cpu);
1323 qemu_cpu_kick_thread(current_cpu);
1326 bool qemu_cpu_is_self(CPUState *cpu)
1328 return qemu_thread_is_self(cpu->thread);
1331 bool qemu_in_vcpu_thread(void)
1333 return current_cpu && qemu_cpu_is_self(current_cpu);
1336 static __thread bool iothread_locked = false;
1338 bool qemu_mutex_iothread_locked(void)
1340 return iothread_locked;
1343 void qemu_mutex_lock_iothread(void)
1345 atomic_inc(&iothread_requesting_mutex);
1346 /* In the simple case there is no need to bump the VCPU thread out of
1347 * TCG code execution.
1349 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1350 !first_cpu || !first_cpu->created) {
1351 qemu_mutex_lock(&qemu_global_mutex);
1352 atomic_dec(&iothread_requesting_mutex);
1353 } else {
1354 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1355 qemu_cpu_kick_no_halt();
1356 qemu_mutex_lock(&qemu_global_mutex);
1358 atomic_dec(&iothread_requesting_mutex);
1359 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1361 iothread_locked = true;
1364 void qemu_mutex_unlock_iothread(void)
1366 iothread_locked = false;
1367 qemu_mutex_unlock(&qemu_global_mutex);
1370 static bool all_vcpus_paused(void)
1372 CPUState *cpu;
1374 CPU_FOREACH(cpu) {
1375 if (!cpu->stopped) {
1376 return false;
1380 return true;
1383 void pause_all_vcpus(void)
1385 CPUState *cpu;
1387 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1388 CPU_FOREACH(cpu) {
1389 cpu->stop = true;
1390 qemu_cpu_kick(cpu);
1393 if (qemu_in_vcpu_thread()) {
1394 cpu_stop_current();
1395 if (!kvm_enabled()) {
1396 CPU_FOREACH(cpu) {
1397 cpu->stop = false;
1398 cpu->stopped = true;
1400 return;
1404 while (!all_vcpus_paused()) {
1405 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1406 CPU_FOREACH(cpu) {
1407 qemu_cpu_kick(cpu);
1412 void cpu_resume(CPUState *cpu)
1414 cpu->stop = false;
1415 cpu->stopped = false;
1416 qemu_cpu_kick(cpu);
1419 void resume_all_vcpus(void)
1421 CPUState *cpu;
1423 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1424 CPU_FOREACH(cpu) {
1425 cpu_resume(cpu);
1429 void cpu_remove(CPUState *cpu)
1431 cpu->stop = true;
1432 cpu->unplug = true;
1433 qemu_cpu_kick(cpu);
1436 void cpu_remove_sync(CPUState *cpu)
1438 cpu_remove(cpu);
1439 while (cpu->created) {
1440 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1444 /* For temporary buffers for forming a name */
1445 #define VCPU_THREAD_NAME_SIZE 16
1447 static void qemu_tcg_init_vcpu(CPUState *cpu)
1449 char thread_name[VCPU_THREAD_NAME_SIZE];
1450 static QemuCond *tcg_halt_cond;
1451 static QemuThread *tcg_cpu_thread;
1453 /* share a single thread for all cpus with TCG */
1454 if (!tcg_cpu_thread) {
1455 cpu->thread = g_malloc0(sizeof(QemuThread));
1456 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1457 qemu_cond_init(cpu->halt_cond);
1458 tcg_halt_cond = cpu->halt_cond;
1459 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1460 cpu->cpu_index);
1461 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1462 cpu, QEMU_THREAD_JOINABLE);
1463 #ifdef _WIN32
1464 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1465 #endif
1466 while (!cpu->created) {
1467 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1469 tcg_cpu_thread = cpu->thread;
1470 } else {
1471 cpu->thread = tcg_cpu_thread;
1472 cpu->halt_cond = tcg_halt_cond;
1476 static void qemu_hax_start_vcpu(CPUState *cpu)
1478 char thread_name[VCPU_THREAD_NAME_SIZE];
1480 cpu->thread = g_malloc0(sizeof(QemuThread));
1481 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1482 qemu_cond_init(cpu->halt_cond);
1484 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1485 cpu->cpu_index);
1486 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1487 cpu, QEMU_THREAD_JOINABLE);
1488 #ifdef _WIN32
1489 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1490 #endif
1491 while (!cpu->created) {
1492 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1496 static void qemu_kvm_start_vcpu(CPUState *cpu)
1498 char thread_name[VCPU_THREAD_NAME_SIZE];
1500 cpu->thread = g_malloc0(sizeof(QemuThread));
1501 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1502 qemu_cond_init(cpu->halt_cond);
1503 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1504 cpu->cpu_index);
1505 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1506 cpu, QEMU_THREAD_JOINABLE);
1507 while (!cpu->created) {
1508 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1512 static void qemu_dummy_start_vcpu(CPUState *cpu)
1514 char thread_name[VCPU_THREAD_NAME_SIZE];
1516 cpu->thread = g_malloc0(sizeof(QemuThread));
1517 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1518 qemu_cond_init(cpu->halt_cond);
1519 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1520 cpu->cpu_index);
1521 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1522 QEMU_THREAD_JOINABLE);
1523 while (!cpu->created) {
1524 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1528 void qemu_init_vcpu(CPUState *cpu)
1530 cpu->nr_cores = smp_cores;
1531 cpu->nr_threads = smp_threads;
1532 cpu->stopped = true;
1534 if (!cpu->as) {
1535 /* If the target cpu hasn't set up any address spaces itself,
1536 * give it the default one.
1538 AddressSpace *as = address_space_init_shareable(cpu->memory,
1539 "cpu-memory");
1540 cpu->num_ases = 1;
1541 cpu_address_space_init(cpu, as, 0);
1544 if (kvm_enabled()) {
1545 qemu_kvm_start_vcpu(cpu);
1546 } else if (hax_enabled()) {
1547 qemu_hax_start_vcpu(cpu);
1548 } else if (tcg_enabled()) {
1549 qemu_tcg_init_vcpu(cpu);
1550 } else {
1551 qemu_dummy_start_vcpu(cpu);
1555 void cpu_stop_current(void)
1557 if (current_cpu) {
1558 current_cpu->stop = false;
1559 current_cpu->stopped = true;
1560 cpu_exit(current_cpu);
1561 qemu_cond_broadcast(&qemu_pause_cond);
1565 int vm_stop(RunState state)
1567 if (qemu_in_vcpu_thread()) {
1568 qemu_system_vmstop_request_prepare();
1569 qemu_system_vmstop_request(state);
1571 * FIXME: should not return to device code in case
1572 * vm_stop() has been requested.
1574 cpu_stop_current();
1575 return 0;
1578 return do_vm_stop(state);
1582 * Prepare for (re)starting the VM.
1583 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1584 * running or in case of an error condition), 0 otherwise.
1586 int vm_prepare_start(void)
1588 RunState requested;
1589 int res = 0;
1591 qemu_vmstop_requested(&requested);
1592 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1593 return -1;
1596 /* Ensure that a STOP/RESUME pair of events is emitted if a
1597 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1598 * example, according to documentation is always followed by
1599 * the STOP event.
1601 if (runstate_is_running()) {
1602 qapi_event_send_stop(&error_abort);
1603 res = -1;
1604 } else {
1605 replay_enable_events();
1606 cpu_enable_ticks();
1607 runstate_set(RUN_STATE_RUNNING);
1608 vm_state_notify(1, RUN_STATE_RUNNING);
1611 /* We are sending this now, but the CPUs will be resumed shortly later */
1612 qapi_event_send_resume(&error_abort);
1613 return res;
1616 void vm_start(void)
1618 if (!vm_prepare_start()) {
1619 resume_all_vcpus();
1623 /* does a state transition even if the VM is already stopped,
1624 current state is forgotten forever */
1625 int vm_stop_force_state(RunState state)
1627 if (runstate_is_running()) {
1628 return vm_stop(state);
1629 } else {
1630 runstate_set(state);
1632 bdrv_drain_all();
1633 /* Make sure to return an error if the flush in a previous vm_stop()
1634 * failed. */
1635 return bdrv_flush_all();
1639 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1641 /* XXX: implement xxx_cpu_list for targets that still miss it */
1642 #if defined(cpu_list)
1643 cpu_list(f, cpu_fprintf);
1644 #endif
1647 CpuInfoList *qmp_query_cpus(Error **errp)
1649 CpuInfoList *head = NULL, *cur_item = NULL;
1650 CPUState *cpu;
1652 CPU_FOREACH(cpu) {
1653 CpuInfoList *info;
1654 #if defined(TARGET_I386)
1655 X86CPU *x86_cpu = X86_CPU(cpu);
1656 CPUX86State *env = &x86_cpu->env;
1657 #elif defined(TARGET_PPC)
1658 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1659 CPUPPCState *env = &ppc_cpu->env;
1660 #elif defined(TARGET_SPARC)
1661 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1662 CPUSPARCState *env = &sparc_cpu->env;
1663 #elif defined(TARGET_MIPS)
1664 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1665 CPUMIPSState *env = &mips_cpu->env;
1666 #elif defined(TARGET_TRICORE)
1667 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1668 CPUTriCoreState *env = &tricore_cpu->env;
1669 #endif
1671 cpu_synchronize_state(cpu);
1673 info = g_malloc0(sizeof(*info));
1674 info->value = g_malloc0(sizeof(*info->value));
1675 info->value->CPU = cpu->cpu_index;
1676 info->value->current = (cpu == first_cpu);
1677 info->value->halted = cpu->halted;
1678 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1679 info->value->thread_id = cpu->thread_id;
1680 #if defined(TARGET_I386)
1681 info->value->arch = CPU_INFO_ARCH_X86;
1682 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1683 #elif defined(TARGET_PPC)
1684 info->value->arch = CPU_INFO_ARCH_PPC;
1685 info->value->u.ppc.nip = env->nip;
1686 #elif defined(TARGET_SPARC)
1687 info->value->arch = CPU_INFO_ARCH_SPARC;
1688 info->value->u.q_sparc.pc = env->pc;
1689 info->value->u.q_sparc.npc = env->npc;
1690 #elif defined(TARGET_MIPS)
1691 info->value->arch = CPU_INFO_ARCH_MIPS;
1692 info->value->u.q_mips.PC = env->active_tc.PC;
1693 #elif defined(TARGET_TRICORE)
1694 info->value->arch = CPU_INFO_ARCH_TRICORE;
1695 info->value->u.tricore.PC = env->PC;
1696 #else
1697 info->value->arch = CPU_INFO_ARCH_OTHER;
1698 #endif
1700 /* XXX: waiting for the qapi to support GSList */
1701 if (!cur_item) {
1702 head = cur_item = info;
1703 } else {
1704 cur_item->next = info;
1705 cur_item = info;
1709 return head;
1712 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1713 bool has_cpu, int64_t cpu_index, Error **errp)
1715 FILE *f;
1716 uint32_t l;
1717 CPUState *cpu;
1718 uint8_t buf[1024];
1719 int64_t orig_addr = addr, orig_size = size;
1721 if (!has_cpu) {
1722 cpu_index = 0;
1725 cpu = qemu_get_cpu(cpu_index);
1726 if (cpu == NULL) {
1727 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1728 "a CPU number");
1729 return;
1732 f = fopen(filename, "wb");
1733 if (!f) {
1734 error_setg_file_open(errp, errno, filename);
1735 return;
1738 while (size != 0) {
1739 l = sizeof(buf);
1740 if (l > size)
1741 l = size;
1742 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1743 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1744 " specified", orig_addr, orig_size);
1745 goto exit;
1747 if (fwrite(buf, 1, l, f) != l) {
1748 error_setg(errp, QERR_IO_ERROR);
1749 goto exit;
1751 addr += l;
1752 size -= l;
1755 exit:
1756 fclose(f);
1759 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1760 Error **errp)
1762 FILE *f;
1763 uint32_t l;
1764 uint8_t buf[1024];
1766 f = fopen(filename, "wb");
1767 if (!f) {
1768 error_setg_file_open(errp, errno, filename);
1769 return;
1772 while (size != 0) {
1773 l = sizeof(buf);
1774 if (l > size)
1775 l = size;
1776 cpu_physical_memory_read(addr, buf, l);
1777 if (fwrite(buf, 1, l, f) != l) {
1778 error_setg(errp, QERR_IO_ERROR);
1779 goto exit;
1781 addr += l;
1782 size -= l;
1785 exit:
1786 fclose(f);
1789 void qmp_inject_nmi(Error **errp)
1791 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1794 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1796 if (!use_icount) {
1797 return;
1800 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1801 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1802 if (icount_align_option) {
1803 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1804 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1805 } else {
1806 cpu_fprintf(f, "Max guest delay NA\n");
1807 cpu_fprintf(f, "Max guest advance NA\n");