cpus: introduce cpu_update_icount helper
[qemu/kevin.git] / cpus.c
bloba5125d7167881ae84bae518ee68475de6e7ec344
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
54 #ifdef CONFIG_LINUX
56 #include <sys/prctl.h>
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
70 #endif /* CONFIG_LINUX */
72 int64_t max_delay;
73 int64_t max_advance;
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState *cpu)
85 return cpu->stopped || !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState *cpu)
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
93 if (cpu_is_stopped(cpu)) {
94 return true;
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
100 return true;
103 static bool all_cpu_threads_idle(void)
105 CPUState *cpu;
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
112 return true;
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
150 static TimersState timers_state;
151 bool mttcg_enabled;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177 #else
178 return false;
179 #endif
182 static bool default_mttcg_enabled(void)
184 if (use_icount || TCG_OVERSIZED_GUEST) {
185 return false;
186 } else {
187 #ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
189 #else
190 return false;
191 #endif
195 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
197 const char *t = qemu_opt_get(opts, "thread");
198 if (t) {
199 if (strcmp(t, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST) {
201 error_setg(errp, "No MTTCG when guest word size > hosts");
202 } else if (use_icount) {
203 error_setg(errp, "No MTTCG when icount is enabled");
204 } else {
205 #ifndef TARGET_SUPPORTS_MTTCG
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
208 #endif
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
212 error_printf("This may cause strange/hard to debug errors\n");
214 mttcg_enabled = true;
216 } else if (strcmp(t, "single") == 0) {
217 mttcg_enabled = false;
218 } else {
219 error_setg(errp, "Invalid 'thread' setting %s", t);
221 } else {
222 mttcg_enabled = default_mttcg_enabled();
226 /* The current number of executed instructions is based on what we
227 * originally budgeted minus the current state of the decrementing
228 * icount counters in extra/u16.low.
230 static int64_t cpu_get_icount_executed(CPUState *cpu)
232 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
236 * Update the global shared timer_state.qemu_icount to take into
237 * account executed instructions. This is done by the TCG vCPU
238 * thread so the main-loop can see time has moved forward.
240 void cpu_update_icount(CPUState *cpu)
242 int64_t executed = cpu_get_icount_executed(cpu);
243 cpu->icount_budget -= executed;
245 #ifdef CONFIG_ATOMIC64
246 atomic_set__nocheck(&timers_state.qemu_icount,
247 atomic_read__nocheck(&timers_state.qemu_icount) +
248 executed);
249 #else /* FIXME: we need 64bit atomics to do this safely */
250 timers_state.qemu_icount += executed;
251 #endif
254 int64_t cpu_get_icount_raw(void)
256 int64_t icount;
257 CPUState *cpu = current_cpu;
259 icount = atomic_read(&timers_state.qemu_icount);
260 if (cpu && cpu->running) {
261 if (!cpu->can_do_io) {
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
265 /* Take into account what has run */
266 icount += cpu_get_icount_executed(cpu);
268 return icount;
271 /* Return the virtual CPU time, based on the instruction counter. */
272 static int64_t cpu_get_icount_locked(void)
274 int64_t icount = cpu_get_icount_raw();
275 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
278 int64_t cpu_get_icount(void)
280 int64_t icount;
281 unsigned start;
283 do {
284 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
285 icount = cpu_get_icount_locked();
286 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
288 return icount;
291 int64_t cpu_icount_to_ns(int64_t icount)
293 return icount << icount_time_shift;
296 /* return the time elapsed in VM between vm_start and vm_stop. Unless
297 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
298 * counter.
300 * Caller must hold the BQL
302 int64_t cpu_get_ticks(void)
304 int64_t ticks;
306 if (use_icount) {
307 return cpu_get_icount();
310 ticks = timers_state.cpu_ticks_offset;
311 if (timers_state.cpu_ticks_enabled) {
312 ticks += cpu_get_host_ticks();
315 if (timers_state.cpu_ticks_prev > ticks) {
316 /* Note: non increasing ticks may happen if the host uses
317 software suspend */
318 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
319 ticks = timers_state.cpu_ticks_prev;
322 timers_state.cpu_ticks_prev = ticks;
323 return ticks;
326 static int64_t cpu_get_clock_locked(void)
328 int64_t time;
330 time = timers_state.cpu_clock_offset;
331 if (timers_state.cpu_ticks_enabled) {
332 time += get_clock();
335 return time;
338 /* Return the monotonic time elapsed in VM, i.e.,
339 * the time between vm_start and vm_stop
341 int64_t cpu_get_clock(void)
343 int64_t ti;
344 unsigned start;
346 do {
347 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
348 ti = cpu_get_clock_locked();
349 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
351 return ti;
354 /* enable cpu_get_ticks()
355 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
357 void cpu_enable_ticks(void)
359 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
360 seqlock_write_begin(&timers_state.vm_clock_seqlock);
361 if (!timers_state.cpu_ticks_enabled) {
362 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
363 timers_state.cpu_clock_offset -= get_clock();
364 timers_state.cpu_ticks_enabled = 1;
366 seqlock_write_end(&timers_state.vm_clock_seqlock);
369 /* disable cpu_get_ticks() : the clock is stopped. You must not call
370 * cpu_get_ticks() after that.
371 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
373 void cpu_disable_ticks(void)
375 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
376 seqlock_write_begin(&timers_state.vm_clock_seqlock);
377 if (timers_state.cpu_ticks_enabled) {
378 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
379 timers_state.cpu_clock_offset = cpu_get_clock_locked();
380 timers_state.cpu_ticks_enabled = 0;
382 seqlock_write_end(&timers_state.vm_clock_seqlock);
385 /* Correlation between real and virtual time is always going to be
386 fairly approximate, so ignore small variation.
387 When the guest is idle real and virtual time will be aligned in
388 the IO wait loop. */
389 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
391 static void icount_adjust(void)
393 int64_t cur_time;
394 int64_t cur_icount;
395 int64_t delta;
397 /* Protected by TimersState mutex. */
398 static int64_t last_delta;
400 /* If the VM is not running, then do nothing. */
401 if (!runstate_is_running()) {
402 return;
405 seqlock_write_begin(&timers_state.vm_clock_seqlock);
406 cur_time = cpu_get_clock_locked();
407 cur_icount = cpu_get_icount_locked();
409 delta = cur_icount - cur_time;
410 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
411 if (delta > 0
412 && last_delta + ICOUNT_WOBBLE < delta * 2
413 && icount_time_shift > 0) {
414 /* The guest is getting too far ahead. Slow time down. */
415 icount_time_shift--;
417 if (delta < 0
418 && last_delta - ICOUNT_WOBBLE > delta * 2
419 && icount_time_shift < MAX_ICOUNT_SHIFT) {
420 /* The guest is getting too far behind. Speed time up. */
421 icount_time_shift++;
423 last_delta = delta;
424 timers_state.qemu_icount_bias = cur_icount
425 - (timers_state.qemu_icount << icount_time_shift);
426 seqlock_write_end(&timers_state.vm_clock_seqlock);
429 static void icount_adjust_rt(void *opaque)
431 timer_mod(icount_rt_timer,
432 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
433 icount_adjust();
436 static void icount_adjust_vm(void *opaque)
438 timer_mod(icount_vm_timer,
439 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
440 NANOSECONDS_PER_SECOND / 10);
441 icount_adjust();
444 static int64_t qemu_icount_round(int64_t count)
446 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
449 static void icount_warp_rt(void)
451 unsigned seq;
452 int64_t warp_start;
454 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
455 * changes from -1 to another value, so the race here is okay.
457 do {
458 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
459 warp_start = vm_clock_warp_start;
460 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
462 if (warp_start == -1) {
463 return;
466 seqlock_write_begin(&timers_state.vm_clock_seqlock);
467 if (runstate_is_running()) {
468 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
469 cpu_get_clock_locked());
470 int64_t warp_delta;
472 warp_delta = clock - vm_clock_warp_start;
473 if (use_icount == 2) {
475 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
476 * far ahead of real time.
478 int64_t cur_icount = cpu_get_icount_locked();
479 int64_t delta = clock - cur_icount;
480 warp_delta = MIN(warp_delta, delta);
482 timers_state.qemu_icount_bias += warp_delta;
484 vm_clock_warp_start = -1;
485 seqlock_write_end(&timers_state.vm_clock_seqlock);
487 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
488 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 static void icount_timer_cb(void *opaque)
494 /* No need for a checkpoint because the timer already synchronizes
495 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
497 icount_warp_rt();
500 void qtest_clock_warp(int64_t dest)
502 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
503 AioContext *aio_context;
504 assert(qtest_enabled());
505 aio_context = qemu_get_aio_context();
506 while (clock < dest) {
507 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
508 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
510 seqlock_write_begin(&timers_state.vm_clock_seqlock);
511 timers_state.qemu_icount_bias += warp;
512 seqlock_write_end(&timers_state.vm_clock_seqlock);
514 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
515 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
516 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
518 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
521 void qemu_start_warp_timer(void)
523 int64_t clock;
524 int64_t deadline;
526 if (!use_icount) {
527 return;
530 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
531 * do not fire, so computing the deadline does not make sense.
533 if (!runstate_is_running()) {
534 return;
537 /* warp clock deterministically in record/replay mode */
538 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
539 return;
542 if (!all_cpu_threads_idle()) {
543 return;
546 if (qtest_enabled()) {
547 /* When testing, qtest commands advance icount. */
548 return;
551 /* We want to use the earliest deadline from ALL vm_clocks */
552 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
553 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
554 if (deadline < 0) {
555 static bool notified;
556 if (!icount_sleep && !notified) {
557 error_report("WARNING: icount sleep disabled and no active timers");
558 notified = true;
560 return;
563 if (deadline > 0) {
565 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
566 * sleep. Otherwise, the CPU might be waiting for a future timer
567 * interrupt to wake it up, but the interrupt never comes because
568 * the vCPU isn't running any insns and thus doesn't advance the
569 * QEMU_CLOCK_VIRTUAL.
571 if (!icount_sleep) {
573 * We never let VCPUs sleep in no sleep icount mode.
574 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
575 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
576 * It is useful when we want a deterministic execution time,
577 * isolated from host latencies.
579 seqlock_write_begin(&timers_state.vm_clock_seqlock);
580 timers_state.qemu_icount_bias += deadline;
581 seqlock_write_end(&timers_state.vm_clock_seqlock);
582 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
583 } else {
585 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
586 * "real" time, (related to the time left until the next event) has
587 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
588 * This avoids that the warps are visible externally; for example,
589 * you will not be sending network packets continuously instead of
590 * every 100ms.
592 seqlock_write_begin(&timers_state.vm_clock_seqlock);
593 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
594 vm_clock_warp_start = clock;
596 seqlock_write_end(&timers_state.vm_clock_seqlock);
597 timer_mod_anticipate(icount_warp_timer, clock + deadline);
599 } else if (deadline == 0) {
600 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
604 static void qemu_account_warp_timer(void)
606 if (!use_icount || !icount_sleep) {
607 return;
610 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
611 * do not fire, so computing the deadline does not make sense.
613 if (!runstate_is_running()) {
614 return;
617 /* warp clock deterministically in record/replay mode */
618 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
619 return;
622 timer_del(icount_warp_timer);
623 icount_warp_rt();
626 static bool icount_state_needed(void *opaque)
628 return use_icount;
632 * This is a subsection for icount migration.
634 static const VMStateDescription icount_vmstate_timers = {
635 .name = "timer/icount",
636 .version_id = 1,
637 .minimum_version_id = 1,
638 .needed = icount_state_needed,
639 .fields = (VMStateField[]) {
640 VMSTATE_INT64(qemu_icount_bias, TimersState),
641 VMSTATE_INT64(qemu_icount, TimersState),
642 VMSTATE_END_OF_LIST()
646 static const VMStateDescription vmstate_timers = {
647 .name = "timer",
648 .version_id = 2,
649 .minimum_version_id = 1,
650 .fields = (VMStateField[]) {
651 VMSTATE_INT64(cpu_ticks_offset, TimersState),
652 VMSTATE_INT64(dummy, TimersState),
653 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
654 VMSTATE_END_OF_LIST()
656 .subsections = (const VMStateDescription*[]) {
657 &icount_vmstate_timers,
658 NULL
662 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
664 double pct;
665 double throttle_ratio;
666 long sleeptime_ns;
668 if (!cpu_throttle_get_percentage()) {
669 return;
672 pct = (double)cpu_throttle_get_percentage()/100;
673 throttle_ratio = pct / (1 - pct);
674 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
676 qemu_mutex_unlock_iothread();
677 atomic_set(&cpu->throttle_thread_scheduled, 0);
678 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
679 qemu_mutex_lock_iothread();
682 static void cpu_throttle_timer_tick(void *opaque)
684 CPUState *cpu;
685 double pct;
687 /* Stop the timer if needed */
688 if (!cpu_throttle_get_percentage()) {
689 return;
691 CPU_FOREACH(cpu) {
692 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
693 async_run_on_cpu(cpu, cpu_throttle_thread,
694 RUN_ON_CPU_NULL);
698 pct = (double)cpu_throttle_get_percentage()/100;
699 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
700 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
703 void cpu_throttle_set(int new_throttle_pct)
705 /* Ensure throttle percentage is within valid range */
706 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
707 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
709 atomic_set(&throttle_percentage, new_throttle_pct);
711 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
712 CPU_THROTTLE_TIMESLICE_NS);
715 void cpu_throttle_stop(void)
717 atomic_set(&throttle_percentage, 0);
720 bool cpu_throttle_active(void)
722 return (cpu_throttle_get_percentage() != 0);
725 int cpu_throttle_get_percentage(void)
727 return atomic_read(&throttle_percentage);
730 void cpu_ticks_init(void)
732 seqlock_init(&timers_state.vm_clock_seqlock);
733 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
734 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
735 cpu_throttle_timer_tick, NULL);
738 void configure_icount(QemuOpts *opts, Error **errp)
740 const char *option;
741 char *rem_str = NULL;
743 option = qemu_opt_get(opts, "shift");
744 if (!option) {
745 if (qemu_opt_get(opts, "align") != NULL) {
746 error_setg(errp, "Please specify shift option when using align");
748 return;
751 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
752 if (icount_sleep) {
753 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
754 icount_timer_cb, NULL);
757 icount_align_option = qemu_opt_get_bool(opts, "align", false);
759 if (icount_align_option && !icount_sleep) {
760 error_setg(errp, "align=on and sleep=off are incompatible");
762 if (strcmp(option, "auto") != 0) {
763 errno = 0;
764 icount_time_shift = strtol(option, &rem_str, 0);
765 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
766 error_setg(errp, "icount: Invalid shift value");
768 use_icount = 1;
769 return;
770 } else if (icount_align_option) {
771 error_setg(errp, "shift=auto and align=on are incompatible");
772 } else if (!icount_sleep) {
773 error_setg(errp, "shift=auto and sleep=off are incompatible");
776 use_icount = 2;
778 /* 125MIPS seems a reasonable initial guess at the guest speed.
779 It will be corrected fairly quickly anyway. */
780 icount_time_shift = 3;
782 /* Have both realtime and virtual time triggers for speed adjustment.
783 The realtime trigger catches emulated time passing too slowly,
784 the virtual time trigger catches emulated time passing too fast.
785 Realtime triggers occur even when idle, so use them less frequently
786 than VM triggers. */
787 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
788 icount_adjust_rt, NULL);
789 timer_mod(icount_rt_timer,
790 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
791 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
792 icount_adjust_vm, NULL);
793 timer_mod(icount_vm_timer,
794 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
795 NANOSECONDS_PER_SECOND / 10);
798 /***********************************************************/
799 /* TCG vCPU kick timer
801 * The kick timer is responsible for moving single threaded vCPU
802 * emulation on to the next vCPU. If more than one vCPU is running a
803 * timer event with force a cpu->exit so the next vCPU can get
804 * scheduled.
806 * The timer is removed if all vCPUs are idle and restarted again once
807 * idleness is complete.
810 static QEMUTimer *tcg_kick_vcpu_timer;
811 static CPUState *tcg_current_rr_cpu;
813 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
815 static inline int64_t qemu_tcg_next_kick(void)
817 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
820 /* Kick the currently round-robin scheduled vCPU */
821 static void qemu_cpu_kick_rr_cpu(void)
823 CPUState *cpu;
824 do {
825 cpu = atomic_mb_read(&tcg_current_rr_cpu);
826 if (cpu) {
827 cpu_exit(cpu);
829 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
832 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
836 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
838 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
839 qemu_notify_event();
840 return;
843 if (!qemu_in_vcpu_thread() && first_cpu) {
844 /* qemu_cpu_kick is not enough to kick a halted CPU out of
845 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
846 * causes cpu_thread_is_idle to return false. This way,
847 * handle_icount_deadline can run.
849 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
853 static void kick_tcg_thread(void *opaque)
855 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
856 qemu_cpu_kick_rr_cpu();
859 static void start_tcg_kick_timer(void)
861 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
862 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
863 kick_tcg_thread, NULL);
864 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
868 static void stop_tcg_kick_timer(void)
870 if (tcg_kick_vcpu_timer) {
871 timer_del(tcg_kick_vcpu_timer);
872 tcg_kick_vcpu_timer = NULL;
876 /***********************************************************/
877 void hw_error(const char *fmt, ...)
879 va_list ap;
880 CPUState *cpu;
882 va_start(ap, fmt);
883 fprintf(stderr, "qemu: hardware error: ");
884 vfprintf(stderr, fmt, ap);
885 fprintf(stderr, "\n");
886 CPU_FOREACH(cpu) {
887 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
888 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
890 va_end(ap);
891 abort();
894 void cpu_synchronize_all_states(void)
896 CPUState *cpu;
898 CPU_FOREACH(cpu) {
899 cpu_synchronize_state(cpu);
903 void cpu_synchronize_all_post_reset(void)
905 CPUState *cpu;
907 CPU_FOREACH(cpu) {
908 cpu_synchronize_post_reset(cpu);
912 void cpu_synchronize_all_post_init(void)
914 CPUState *cpu;
916 CPU_FOREACH(cpu) {
917 cpu_synchronize_post_init(cpu);
921 static int do_vm_stop(RunState state)
923 int ret = 0;
925 if (runstate_is_running()) {
926 cpu_disable_ticks();
927 pause_all_vcpus();
928 runstate_set(state);
929 vm_state_notify(0, state);
930 qapi_event_send_stop(&error_abort);
933 bdrv_drain_all();
934 replay_disable_events();
935 ret = bdrv_flush_all();
937 return ret;
940 static bool cpu_can_run(CPUState *cpu)
942 if (cpu->stop) {
943 return false;
945 if (cpu_is_stopped(cpu)) {
946 return false;
948 return true;
951 static void cpu_handle_guest_debug(CPUState *cpu)
953 gdb_set_stop_cpu(cpu);
954 qemu_system_debug_request();
955 cpu->stopped = true;
958 #ifdef CONFIG_LINUX
959 static void sigbus_reraise(void)
961 sigset_t set;
962 struct sigaction action;
964 memset(&action, 0, sizeof(action));
965 action.sa_handler = SIG_DFL;
966 if (!sigaction(SIGBUS, &action, NULL)) {
967 raise(SIGBUS);
968 sigemptyset(&set);
969 sigaddset(&set, SIGBUS);
970 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
972 perror("Failed to re-raise SIGBUS!\n");
973 abort();
976 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
978 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
979 sigbus_reraise();
982 if (current_cpu) {
983 /* Called asynchronously in VCPU thread. */
984 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
985 sigbus_reraise();
987 } else {
988 /* Called synchronously (via signalfd) in main thread. */
989 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
990 sigbus_reraise();
995 static void qemu_init_sigbus(void)
997 struct sigaction action;
999 memset(&action, 0, sizeof(action));
1000 action.sa_flags = SA_SIGINFO;
1001 action.sa_sigaction = sigbus_handler;
1002 sigaction(SIGBUS, &action, NULL);
1004 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1006 #else /* !CONFIG_LINUX */
1007 static void qemu_init_sigbus(void)
1010 #endif /* !CONFIG_LINUX */
1012 static QemuMutex qemu_global_mutex;
1014 static QemuThread io_thread;
1016 /* cpu creation */
1017 static QemuCond qemu_cpu_cond;
1018 /* system init */
1019 static QemuCond qemu_pause_cond;
1021 void qemu_init_cpu_loop(void)
1023 qemu_init_sigbus();
1024 qemu_cond_init(&qemu_cpu_cond);
1025 qemu_cond_init(&qemu_pause_cond);
1026 qemu_mutex_init(&qemu_global_mutex);
1028 qemu_thread_get_self(&io_thread);
1031 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1033 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1036 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1038 if (kvm_destroy_vcpu(cpu) < 0) {
1039 error_report("kvm_destroy_vcpu failed");
1040 exit(EXIT_FAILURE);
1044 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1048 static void qemu_wait_io_event_common(CPUState *cpu)
1050 atomic_mb_set(&cpu->thread_kicked, false);
1051 if (cpu->stop) {
1052 cpu->stop = false;
1053 cpu->stopped = true;
1054 qemu_cond_broadcast(&qemu_pause_cond);
1056 process_queued_cpu_work(cpu);
1059 static bool qemu_tcg_should_sleep(CPUState *cpu)
1061 if (mttcg_enabled) {
1062 return cpu_thread_is_idle(cpu);
1063 } else {
1064 return all_cpu_threads_idle();
1068 static void qemu_tcg_wait_io_event(CPUState *cpu)
1070 while (qemu_tcg_should_sleep(cpu)) {
1071 stop_tcg_kick_timer();
1072 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1075 start_tcg_kick_timer();
1077 qemu_wait_io_event_common(cpu);
1080 static void qemu_kvm_wait_io_event(CPUState *cpu)
1082 while (cpu_thread_is_idle(cpu)) {
1083 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1086 qemu_wait_io_event_common(cpu);
1089 static void *qemu_kvm_cpu_thread_fn(void *arg)
1091 CPUState *cpu = arg;
1092 int r;
1094 rcu_register_thread();
1096 qemu_mutex_lock_iothread();
1097 qemu_thread_get_self(cpu->thread);
1098 cpu->thread_id = qemu_get_thread_id();
1099 cpu->can_do_io = 1;
1100 current_cpu = cpu;
1102 r = kvm_init_vcpu(cpu);
1103 if (r < 0) {
1104 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1105 exit(1);
1108 kvm_init_cpu_signals(cpu);
1110 /* signal CPU creation */
1111 cpu->created = true;
1112 qemu_cond_signal(&qemu_cpu_cond);
1114 do {
1115 if (cpu_can_run(cpu)) {
1116 r = kvm_cpu_exec(cpu);
1117 if (r == EXCP_DEBUG) {
1118 cpu_handle_guest_debug(cpu);
1121 qemu_kvm_wait_io_event(cpu);
1122 } while (!cpu->unplug || cpu_can_run(cpu));
1124 qemu_kvm_destroy_vcpu(cpu);
1125 cpu->created = false;
1126 qemu_cond_signal(&qemu_cpu_cond);
1127 qemu_mutex_unlock_iothread();
1128 return NULL;
1131 static void *qemu_dummy_cpu_thread_fn(void *arg)
1133 #ifdef _WIN32
1134 fprintf(stderr, "qtest is not supported under Windows\n");
1135 exit(1);
1136 #else
1137 CPUState *cpu = arg;
1138 sigset_t waitset;
1139 int r;
1141 rcu_register_thread();
1143 qemu_mutex_lock_iothread();
1144 qemu_thread_get_self(cpu->thread);
1145 cpu->thread_id = qemu_get_thread_id();
1146 cpu->can_do_io = 1;
1147 current_cpu = cpu;
1149 sigemptyset(&waitset);
1150 sigaddset(&waitset, SIG_IPI);
1152 /* signal CPU creation */
1153 cpu->created = true;
1154 qemu_cond_signal(&qemu_cpu_cond);
1156 while (1) {
1157 qemu_mutex_unlock_iothread();
1158 do {
1159 int sig;
1160 r = sigwait(&waitset, &sig);
1161 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1162 if (r == -1) {
1163 perror("sigwait");
1164 exit(1);
1166 qemu_mutex_lock_iothread();
1167 qemu_wait_io_event_common(cpu);
1170 return NULL;
1171 #endif
1174 static int64_t tcg_get_icount_limit(void)
1176 int64_t deadline;
1178 if (replay_mode != REPLAY_MODE_PLAY) {
1179 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1181 /* Maintain prior (possibly buggy) behaviour where if no deadline
1182 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1183 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1184 * nanoseconds.
1186 if ((deadline < 0) || (deadline > INT32_MAX)) {
1187 deadline = INT32_MAX;
1190 return qemu_icount_round(deadline);
1191 } else {
1192 return replay_get_instructions();
1196 static void handle_icount_deadline(void)
1198 assert(qemu_in_vcpu_thread());
1199 if (use_icount) {
1200 int64_t deadline =
1201 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1203 if (deadline == 0) {
1204 /* Wake up other AioContexts. */
1205 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1206 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1211 static void prepare_icount_for_run(CPUState *cpu)
1213 if (use_icount) {
1214 int64_t count;
1215 int decr;
1217 /* These should always be cleared by process_icount_data after
1218 * each vCPU execution. However u16.high can be raised
1219 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1221 g_assert(cpu->icount_decr.u16.low == 0);
1222 g_assert(cpu->icount_extra == 0);
1225 count = tcg_get_icount_limit();
1227 /* To calculate what we have executed so far we need to know
1228 * what we originally budgeted to run this cycle */
1229 cpu->icount_budget = count;
1231 decr = (count > 0xffff) ? 0xffff : count;
1232 count -= decr;
1233 cpu->icount_decr.u16.low = decr;
1234 cpu->icount_extra = count;
1238 static void process_icount_data(CPUState *cpu)
1240 if (use_icount) {
1241 /* Account for executed instructions */
1242 cpu_update_icount(cpu);
1244 /* Reset the counters */
1245 cpu->icount_decr.u16.low = 0;
1246 cpu->icount_extra = 0;
1247 cpu->icount_budget = 0;
1249 replay_account_executed_instructions();
1254 static int tcg_cpu_exec(CPUState *cpu)
1256 int ret;
1257 #ifdef CONFIG_PROFILER
1258 int64_t ti;
1259 #endif
1261 #ifdef CONFIG_PROFILER
1262 ti = profile_getclock();
1263 #endif
1264 qemu_mutex_unlock_iothread();
1265 cpu_exec_start(cpu);
1266 ret = cpu_exec(cpu);
1267 cpu_exec_end(cpu);
1268 qemu_mutex_lock_iothread();
1269 #ifdef CONFIG_PROFILER
1270 tcg_time += profile_getclock() - ti;
1271 #endif
1272 return ret;
1275 /* Destroy any remaining vCPUs which have been unplugged and have
1276 * finished running
1278 static void deal_with_unplugged_cpus(void)
1280 CPUState *cpu;
1282 CPU_FOREACH(cpu) {
1283 if (cpu->unplug && !cpu_can_run(cpu)) {
1284 qemu_tcg_destroy_vcpu(cpu);
1285 cpu->created = false;
1286 qemu_cond_signal(&qemu_cpu_cond);
1287 break;
1292 /* Single-threaded TCG
1294 * In the single-threaded case each vCPU is simulated in turn. If
1295 * there is more than a single vCPU we create a simple timer to kick
1296 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1297 * This is done explicitly rather than relying on side-effects
1298 * elsewhere.
1301 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1303 CPUState *cpu = arg;
1305 rcu_register_thread();
1307 qemu_mutex_lock_iothread();
1308 qemu_thread_get_self(cpu->thread);
1310 CPU_FOREACH(cpu) {
1311 cpu->thread_id = qemu_get_thread_id();
1312 cpu->created = true;
1313 cpu->can_do_io = 1;
1315 qemu_cond_signal(&qemu_cpu_cond);
1317 /* wait for initial kick-off after machine start */
1318 while (first_cpu->stopped) {
1319 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1321 /* process any pending work */
1322 CPU_FOREACH(cpu) {
1323 current_cpu = cpu;
1324 qemu_wait_io_event_common(cpu);
1328 start_tcg_kick_timer();
1330 cpu = first_cpu;
1332 /* process any pending work */
1333 cpu->exit_request = 1;
1335 while (1) {
1336 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1337 qemu_account_warp_timer();
1339 /* Run the timers here. This is much more efficient than
1340 * waking up the I/O thread and waiting for completion.
1342 handle_icount_deadline();
1344 if (!cpu) {
1345 cpu = first_cpu;
1348 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1350 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1351 current_cpu = cpu;
1353 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1354 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1356 if (cpu_can_run(cpu)) {
1357 int r;
1359 prepare_icount_for_run(cpu);
1361 r = tcg_cpu_exec(cpu);
1363 process_icount_data(cpu);
1365 if (r == EXCP_DEBUG) {
1366 cpu_handle_guest_debug(cpu);
1367 break;
1368 } else if (r == EXCP_ATOMIC) {
1369 qemu_mutex_unlock_iothread();
1370 cpu_exec_step_atomic(cpu);
1371 qemu_mutex_lock_iothread();
1372 break;
1374 } else if (cpu->stop) {
1375 if (cpu->unplug) {
1376 cpu = CPU_NEXT(cpu);
1378 break;
1381 cpu = CPU_NEXT(cpu);
1382 } /* while (cpu && !cpu->exit_request).. */
1384 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1385 atomic_set(&tcg_current_rr_cpu, NULL);
1387 if (cpu && cpu->exit_request) {
1388 atomic_mb_set(&cpu->exit_request, 0);
1391 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1392 deal_with_unplugged_cpus();
1395 return NULL;
1398 static void *qemu_hax_cpu_thread_fn(void *arg)
1400 CPUState *cpu = arg;
1401 int r;
1403 qemu_mutex_lock_iothread();
1404 qemu_thread_get_self(cpu->thread);
1406 cpu->thread_id = qemu_get_thread_id();
1407 cpu->created = true;
1408 cpu->halted = 0;
1409 current_cpu = cpu;
1411 hax_init_vcpu(cpu);
1412 qemu_cond_signal(&qemu_cpu_cond);
1414 while (1) {
1415 if (cpu_can_run(cpu)) {
1416 r = hax_smp_cpu_exec(cpu);
1417 if (r == EXCP_DEBUG) {
1418 cpu_handle_guest_debug(cpu);
1422 while (cpu_thread_is_idle(cpu)) {
1423 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1425 #ifdef _WIN32
1426 SleepEx(0, TRUE);
1427 #endif
1428 qemu_wait_io_event_common(cpu);
1430 return NULL;
1433 #ifdef _WIN32
1434 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1437 #endif
1439 /* Multi-threaded TCG
1441 * In the multi-threaded case each vCPU has its own thread. The TLS
1442 * variable current_cpu can be used deep in the code to find the
1443 * current CPUState for a given thread.
1446 static void *qemu_tcg_cpu_thread_fn(void *arg)
1448 CPUState *cpu = arg;
1450 g_assert(!use_icount);
1452 rcu_register_thread();
1454 qemu_mutex_lock_iothread();
1455 qemu_thread_get_self(cpu->thread);
1457 cpu->thread_id = qemu_get_thread_id();
1458 cpu->created = true;
1459 cpu->can_do_io = 1;
1460 current_cpu = cpu;
1461 qemu_cond_signal(&qemu_cpu_cond);
1463 /* process any pending work */
1464 cpu->exit_request = 1;
1466 while (1) {
1467 if (cpu_can_run(cpu)) {
1468 int r;
1469 r = tcg_cpu_exec(cpu);
1470 switch (r) {
1471 case EXCP_DEBUG:
1472 cpu_handle_guest_debug(cpu);
1473 break;
1474 case EXCP_HALTED:
1475 /* during start-up the vCPU is reset and the thread is
1476 * kicked several times. If we don't ensure we go back
1477 * to sleep in the halted state we won't cleanly
1478 * start-up when the vCPU is enabled.
1480 * cpu->halted should ensure we sleep in wait_io_event
1482 g_assert(cpu->halted);
1483 break;
1484 case EXCP_ATOMIC:
1485 qemu_mutex_unlock_iothread();
1486 cpu_exec_step_atomic(cpu);
1487 qemu_mutex_lock_iothread();
1488 default:
1489 /* Ignore everything else? */
1490 break;
1494 atomic_mb_set(&cpu->exit_request, 0);
1495 qemu_tcg_wait_io_event(cpu);
1498 return NULL;
1501 static void qemu_cpu_kick_thread(CPUState *cpu)
1503 #ifndef _WIN32
1504 int err;
1506 if (cpu->thread_kicked) {
1507 return;
1509 cpu->thread_kicked = true;
1510 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1511 if (err) {
1512 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1513 exit(1);
1515 #else /* _WIN32 */
1516 if (!qemu_cpu_is_self(cpu)) {
1517 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1518 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1519 __func__, GetLastError());
1520 exit(1);
1523 #endif
1526 void qemu_cpu_kick(CPUState *cpu)
1528 qemu_cond_broadcast(cpu->halt_cond);
1529 if (tcg_enabled()) {
1530 cpu_exit(cpu);
1531 /* NOP unless doing single-thread RR */
1532 qemu_cpu_kick_rr_cpu();
1533 } else {
1534 if (hax_enabled()) {
1536 * FIXME: race condition with the exit_request check in
1537 * hax_vcpu_hax_exec
1539 cpu->exit_request = 1;
1541 qemu_cpu_kick_thread(cpu);
1545 void qemu_cpu_kick_self(void)
1547 assert(current_cpu);
1548 qemu_cpu_kick_thread(current_cpu);
1551 bool qemu_cpu_is_self(CPUState *cpu)
1553 return qemu_thread_is_self(cpu->thread);
1556 bool qemu_in_vcpu_thread(void)
1558 return current_cpu && qemu_cpu_is_self(current_cpu);
1561 static __thread bool iothread_locked = false;
1563 bool qemu_mutex_iothread_locked(void)
1565 return iothread_locked;
1568 void qemu_mutex_lock_iothread(void)
1570 g_assert(!qemu_mutex_iothread_locked());
1571 qemu_mutex_lock(&qemu_global_mutex);
1572 iothread_locked = true;
1575 void qemu_mutex_unlock_iothread(void)
1577 g_assert(qemu_mutex_iothread_locked());
1578 iothread_locked = false;
1579 qemu_mutex_unlock(&qemu_global_mutex);
1582 static bool all_vcpus_paused(void)
1584 CPUState *cpu;
1586 CPU_FOREACH(cpu) {
1587 if (!cpu->stopped) {
1588 return false;
1592 return true;
1595 void pause_all_vcpus(void)
1597 CPUState *cpu;
1599 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1600 CPU_FOREACH(cpu) {
1601 cpu->stop = true;
1602 qemu_cpu_kick(cpu);
1605 if (qemu_in_vcpu_thread()) {
1606 cpu_stop_current();
1609 while (!all_vcpus_paused()) {
1610 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1611 CPU_FOREACH(cpu) {
1612 qemu_cpu_kick(cpu);
1617 void cpu_resume(CPUState *cpu)
1619 cpu->stop = false;
1620 cpu->stopped = false;
1621 qemu_cpu_kick(cpu);
1624 void resume_all_vcpus(void)
1626 CPUState *cpu;
1628 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1629 CPU_FOREACH(cpu) {
1630 cpu_resume(cpu);
1634 void cpu_remove(CPUState *cpu)
1636 cpu->stop = true;
1637 cpu->unplug = true;
1638 qemu_cpu_kick(cpu);
1641 void cpu_remove_sync(CPUState *cpu)
1643 cpu_remove(cpu);
1644 while (cpu->created) {
1645 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1649 /* For temporary buffers for forming a name */
1650 #define VCPU_THREAD_NAME_SIZE 16
1652 static void qemu_tcg_init_vcpu(CPUState *cpu)
1654 char thread_name[VCPU_THREAD_NAME_SIZE];
1655 static QemuCond *single_tcg_halt_cond;
1656 static QemuThread *single_tcg_cpu_thread;
1658 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1659 cpu->thread = g_malloc0(sizeof(QemuThread));
1660 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1661 qemu_cond_init(cpu->halt_cond);
1663 if (qemu_tcg_mttcg_enabled()) {
1664 /* create a thread per vCPU with TCG (MTTCG) */
1665 parallel_cpus = true;
1666 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1667 cpu->cpu_index);
1669 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1670 cpu, QEMU_THREAD_JOINABLE);
1672 } else {
1673 /* share a single thread for all cpus with TCG */
1674 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1675 qemu_thread_create(cpu->thread, thread_name,
1676 qemu_tcg_rr_cpu_thread_fn,
1677 cpu, QEMU_THREAD_JOINABLE);
1679 single_tcg_halt_cond = cpu->halt_cond;
1680 single_tcg_cpu_thread = cpu->thread;
1682 #ifdef _WIN32
1683 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1684 #endif
1685 while (!cpu->created) {
1686 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1688 } else {
1689 /* For non-MTTCG cases we share the thread */
1690 cpu->thread = single_tcg_cpu_thread;
1691 cpu->halt_cond = single_tcg_halt_cond;
1695 static void qemu_hax_start_vcpu(CPUState *cpu)
1697 char thread_name[VCPU_THREAD_NAME_SIZE];
1699 cpu->thread = g_malloc0(sizeof(QemuThread));
1700 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1701 qemu_cond_init(cpu->halt_cond);
1703 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1704 cpu->cpu_index);
1705 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1706 cpu, QEMU_THREAD_JOINABLE);
1707 #ifdef _WIN32
1708 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1709 #endif
1710 while (!cpu->created) {
1711 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1715 static void qemu_kvm_start_vcpu(CPUState *cpu)
1717 char thread_name[VCPU_THREAD_NAME_SIZE];
1719 cpu->thread = g_malloc0(sizeof(QemuThread));
1720 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1721 qemu_cond_init(cpu->halt_cond);
1722 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1723 cpu->cpu_index);
1724 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1725 cpu, QEMU_THREAD_JOINABLE);
1726 while (!cpu->created) {
1727 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1731 static void qemu_dummy_start_vcpu(CPUState *cpu)
1733 char thread_name[VCPU_THREAD_NAME_SIZE];
1735 cpu->thread = g_malloc0(sizeof(QemuThread));
1736 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1737 qemu_cond_init(cpu->halt_cond);
1738 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1739 cpu->cpu_index);
1740 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1741 QEMU_THREAD_JOINABLE);
1742 while (!cpu->created) {
1743 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1747 void qemu_init_vcpu(CPUState *cpu)
1749 cpu->nr_cores = smp_cores;
1750 cpu->nr_threads = smp_threads;
1751 cpu->stopped = true;
1753 if (!cpu->as) {
1754 /* If the target cpu hasn't set up any address spaces itself,
1755 * give it the default one.
1757 AddressSpace *as = address_space_init_shareable(cpu->memory,
1758 "cpu-memory");
1759 cpu->num_ases = 1;
1760 cpu_address_space_init(cpu, as, 0);
1763 if (kvm_enabled()) {
1764 qemu_kvm_start_vcpu(cpu);
1765 } else if (hax_enabled()) {
1766 qemu_hax_start_vcpu(cpu);
1767 } else if (tcg_enabled()) {
1768 qemu_tcg_init_vcpu(cpu);
1769 } else {
1770 qemu_dummy_start_vcpu(cpu);
1774 void cpu_stop_current(void)
1776 if (current_cpu) {
1777 current_cpu->stop = false;
1778 current_cpu->stopped = true;
1779 cpu_exit(current_cpu);
1780 qemu_cond_broadcast(&qemu_pause_cond);
1784 int vm_stop(RunState state)
1786 if (qemu_in_vcpu_thread()) {
1787 qemu_system_vmstop_request_prepare();
1788 qemu_system_vmstop_request(state);
1790 * FIXME: should not return to device code in case
1791 * vm_stop() has been requested.
1793 cpu_stop_current();
1794 return 0;
1797 return do_vm_stop(state);
1801 * Prepare for (re)starting the VM.
1802 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1803 * running or in case of an error condition), 0 otherwise.
1805 int vm_prepare_start(void)
1807 RunState requested;
1808 int res = 0;
1810 qemu_vmstop_requested(&requested);
1811 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1812 return -1;
1815 /* Ensure that a STOP/RESUME pair of events is emitted if a
1816 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1817 * example, according to documentation is always followed by
1818 * the STOP event.
1820 if (runstate_is_running()) {
1821 qapi_event_send_stop(&error_abort);
1822 res = -1;
1823 } else {
1824 replay_enable_events();
1825 cpu_enable_ticks();
1826 runstate_set(RUN_STATE_RUNNING);
1827 vm_state_notify(1, RUN_STATE_RUNNING);
1830 /* We are sending this now, but the CPUs will be resumed shortly later */
1831 qapi_event_send_resume(&error_abort);
1832 return res;
1835 void vm_start(void)
1837 if (!vm_prepare_start()) {
1838 resume_all_vcpus();
1842 /* does a state transition even if the VM is already stopped,
1843 current state is forgotten forever */
1844 int vm_stop_force_state(RunState state)
1846 if (runstate_is_running()) {
1847 return vm_stop(state);
1848 } else {
1849 runstate_set(state);
1851 bdrv_drain_all();
1852 /* Make sure to return an error if the flush in a previous vm_stop()
1853 * failed. */
1854 return bdrv_flush_all();
1858 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1860 /* XXX: implement xxx_cpu_list for targets that still miss it */
1861 #if defined(cpu_list)
1862 cpu_list(f, cpu_fprintf);
1863 #endif
1866 CpuInfoList *qmp_query_cpus(Error **errp)
1868 CpuInfoList *head = NULL, *cur_item = NULL;
1869 CPUState *cpu;
1871 CPU_FOREACH(cpu) {
1872 CpuInfoList *info;
1873 #if defined(TARGET_I386)
1874 X86CPU *x86_cpu = X86_CPU(cpu);
1875 CPUX86State *env = &x86_cpu->env;
1876 #elif defined(TARGET_PPC)
1877 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1878 CPUPPCState *env = &ppc_cpu->env;
1879 #elif defined(TARGET_SPARC)
1880 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1881 CPUSPARCState *env = &sparc_cpu->env;
1882 #elif defined(TARGET_MIPS)
1883 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1884 CPUMIPSState *env = &mips_cpu->env;
1885 #elif defined(TARGET_TRICORE)
1886 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1887 CPUTriCoreState *env = &tricore_cpu->env;
1888 #endif
1890 cpu_synchronize_state(cpu);
1892 info = g_malloc0(sizeof(*info));
1893 info->value = g_malloc0(sizeof(*info->value));
1894 info->value->CPU = cpu->cpu_index;
1895 info->value->current = (cpu == first_cpu);
1896 info->value->halted = cpu->halted;
1897 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1898 info->value->thread_id = cpu->thread_id;
1899 #if defined(TARGET_I386)
1900 info->value->arch = CPU_INFO_ARCH_X86;
1901 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1902 #elif defined(TARGET_PPC)
1903 info->value->arch = CPU_INFO_ARCH_PPC;
1904 info->value->u.ppc.nip = env->nip;
1905 #elif defined(TARGET_SPARC)
1906 info->value->arch = CPU_INFO_ARCH_SPARC;
1907 info->value->u.q_sparc.pc = env->pc;
1908 info->value->u.q_sparc.npc = env->npc;
1909 #elif defined(TARGET_MIPS)
1910 info->value->arch = CPU_INFO_ARCH_MIPS;
1911 info->value->u.q_mips.PC = env->active_tc.PC;
1912 #elif defined(TARGET_TRICORE)
1913 info->value->arch = CPU_INFO_ARCH_TRICORE;
1914 info->value->u.tricore.PC = env->PC;
1915 #else
1916 info->value->arch = CPU_INFO_ARCH_OTHER;
1917 #endif
1919 /* XXX: waiting for the qapi to support GSList */
1920 if (!cur_item) {
1921 head = cur_item = info;
1922 } else {
1923 cur_item->next = info;
1924 cur_item = info;
1928 return head;
1931 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1932 bool has_cpu, int64_t cpu_index, Error **errp)
1934 FILE *f;
1935 uint32_t l;
1936 CPUState *cpu;
1937 uint8_t buf[1024];
1938 int64_t orig_addr = addr, orig_size = size;
1940 if (!has_cpu) {
1941 cpu_index = 0;
1944 cpu = qemu_get_cpu(cpu_index);
1945 if (cpu == NULL) {
1946 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1947 "a CPU number");
1948 return;
1951 f = fopen(filename, "wb");
1952 if (!f) {
1953 error_setg_file_open(errp, errno, filename);
1954 return;
1957 while (size != 0) {
1958 l = sizeof(buf);
1959 if (l > size)
1960 l = size;
1961 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1962 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1963 " specified", orig_addr, orig_size);
1964 goto exit;
1966 if (fwrite(buf, 1, l, f) != l) {
1967 error_setg(errp, QERR_IO_ERROR);
1968 goto exit;
1970 addr += l;
1971 size -= l;
1974 exit:
1975 fclose(f);
1978 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1979 Error **errp)
1981 FILE *f;
1982 uint32_t l;
1983 uint8_t buf[1024];
1985 f = fopen(filename, "wb");
1986 if (!f) {
1987 error_setg_file_open(errp, errno, filename);
1988 return;
1991 while (size != 0) {
1992 l = sizeof(buf);
1993 if (l > size)
1994 l = size;
1995 cpu_physical_memory_read(addr, buf, l);
1996 if (fwrite(buf, 1, l, f) != l) {
1997 error_setg(errp, QERR_IO_ERROR);
1998 goto exit;
2000 addr += l;
2001 size -= l;
2004 exit:
2005 fclose(f);
2008 void qmp_inject_nmi(Error **errp)
2010 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2013 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2015 if (!use_icount) {
2016 return;
2019 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2020 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2021 if (icount_align_option) {
2022 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2023 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2024 } else {
2025 cpu_fprintf(f, "Max guest delay NA\n");
2026 cpu_fprintf(f, "Max guest advance NA\n");