icount: fixed saving/restoring of icount warp timers
[qemu/ar7.git] / cpus.c
blobf99253746ad8cdcb0e5a204297a70aec25f62840
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "sysemu/hvf.h"
41 #include "qmp-commands.h"
42 #include "exec/exec-all.h"
44 #include "qemu/thread.h"
45 #include "sysemu/cpus.h"
46 #include "sysemu/qtest.h"
47 #include "qemu/main-loop.h"
48 #include "qemu/bitmap.h"
49 #include "qemu/seqlock.h"
50 #include "tcg.h"
51 #include "qapi-event.h"
52 #include "hw/nmi.h"
53 #include "sysemu/replay.h"
54 #include "hw/boards.h"
56 #ifdef CONFIG_LINUX
58 #include <sys/prctl.h>
60 #ifndef PR_MCE_KILL
61 #define PR_MCE_KILL 33
62 #endif
64 #ifndef PR_MCE_KILL_SET
65 #define PR_MCE_KILL_SET 1
66 #endif
68 #ifndef PR_MCE_KILL_EARLY
69 #define PR_MCE_KILL_EARLY 1
70 #endif
72 #endif /* CONFIG_LINUX */
74 int64_t max_delay;
75 int64_t max_advance;
77 /* vcpu throttling controls */
78 static QEMUTimer *throttle_timer;
79 static unsigned int throttle_percentage;
81 #define CPU_THROTTLE_PCT_MIN 1
82 #define CPU_THROTTLE_PCT_MAX 99
83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
85 bool cpu_is_stopped(CPUState *cpu)
87 return cpu->stopped || !runstate_is_running();
90 static bool cpu_thread_is_idle(CPUState *cpu)
92 if (cpu->stop || cpu->queued_work_first) {
93 return false;
95 if (cpu_is_stopped(cpu)) {
96 return true;
98 if (!cpu->halted || cpu_has_work(cpu) ||
99 kvm_halt_in_kernel()) {
100 return false;
102 return true;
105 static bool all_cpu_threads_idle(void)
107 CPUState *cpu;
109 CPU_FOREACH(cpu) {
110 if (!cpu_thread_is_idle(cpu)) {
111 return false;
114 return true;
117 /***********************************************************/
118 /* guest cycle counter */
120 /* Protected by TimersState seqlock */
122 static bool icount_sleep = true;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
128 typedef struct TimersState {
129 /* Protected by BQL. */
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
133 /* cpu_clock_offset can be read out of BQL, so protect it with
134 * this lock.
136 QemuSeqLock vm_clock_seqlock;
137 int64_t cpu_clock_offset;
138 int32_t cpu_ticks_enabled;
139 int64_t dummy;
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
145 /* for adjusting icount */
146 int64_t vm_clock_warp_start;
147 QEMUTimer *icount_rt_timer;
148 QEMUTimer *icount_vm_timer;
149 QEMUTimer *icount_warp_timer;
150 } TimersState;
152 static TimersState timers_state;
153 bool mttcg_enabled;
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
175 static bool check_tcg_memory_orders_compatible(void)
177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179 #else
180 return false;
181 #endif
184 static bool default_mttcg_enabled(void)
186 if (use_icount || TCG_OVERSIZED_GUEST) {
187 return false;
188 } else {
189 #ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191 #else
192 return false;
193 #endif
197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
206 } else {
207 #ifndef TARGET_SUPPORTS_MTTCG
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210 #endif
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
214 error_printf("This may cause strange/hard to debug errors\n");
216 mttcg_enabled = true;
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
228 /* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
232 static int64_t cpu_get_icount_executed(CPUState *cpu)
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
242 void cpu_update_icount(CPUState *cpu)
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
247 #ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 atomic_read__nocheck(&timers_state.qemu_icount) +
250 executed);
251 #else /* FIXME: we need 64bit atomics to do this safely */
252 timers_state.qemu_icount += executed;
253 #endif
256 int64_t cpu_get_icount_raw(void)
258 CPUState *cpu = current_cpu;
260 if (cpu && cpu->running) {
261 if (!cpu->can_do_io) {
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
265 /* Take into account what has run */
266 cpu_update_icount(cpu);
268 #ifdef CONFIG_ATOMIC64
269 return atomic_read__nocheck(&timers_state.qemu_icount);
270 #else /* FIXME: we need 64bit atomics to do this safely */
271 return timers_state.qemu_icount;
272 #endif
275 /* Return the virtual CPU time, based on the instruction counter. */
276 static int64_t cpu_get_icount_locked(void)
278 int64_t icount = cpu_get_icount_raw();
279 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
282 int64_t cpu_get_icount(void)
284 int64_t icount;
285 unsigned start;
287 do {
288 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
289 icount = cpu_get_icount_locked();
290 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
292 return icount;
295 int64_t cpu_icount_to_ns(int64_t icount)
297 return icount << icount_time_shift;
300 /* return the time elapsed in VM between vm_start and vm_stop. Unless
301 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * counter.
304 * Caller must hold the BQL
306 int64_t cpu_get_ticks(void)
308 int64_t ticks;
310 if (use_icount) {
311 return cpu_get_icount();
314 ticks = timers_state.cpu_ticks_offset;
315 if (timers_state.cpu_ticks_enabled) {
316 ticks += cpu_get_host_ticks();
319 if (timers_state.cpu_ticks_prev > ticks) {
320 /* Note: non increasing ticks may happen if the host uses
321 software suspend */
322 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
323 ticks = timers_state.cpu_ticks_prev;
326 timers_state.cpu_ticks_prev = ticks;
327 return ticks;
330 static int64_t cpu_get_clock_locked(void)
332 int64_t time;
334 time = timers_state.cpu_clock_offset;
335 if (timers_state.cpu_ticks_enabled) {
336 time += get_clock();
339 return time;
342 /* Return the monotonic time elapsed in VM, i.e.,
343 * the time between vm_start and vm_stop
345 int64_t cpu_get_clock(void)
347 int64_t ti;
348 unsigned start;
350 do {
351 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
352 ti = cpu_get_clock_locked();
353 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
355 return ti;
358 /* enable cpu_get_ticks()
359 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
361 void cpu_enable_ticks(void)
363 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (!timers_state.cpu_ticks_enabled) {
366 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
367 timers_state.cpu_clock_offset -= get_clock();
368 timers_state.cpu_ticks_enabled = 1;
370 seqlock_write_end(&timers_state.vm_clock_seqlock);
373 /* disable cpu_get_ticks() : the clock is stopped. You must not call
374 * cpu_get_ticks() after that.
375 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
377 void cpu_disable_ticks(void)
379 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
380 seqlock_write_begin(&timers_state.vm_clock_seqlock);
381 if (timers_state.cpu_ticks_enabled) {
382 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
383 timers_state.cpu_clock_offset = cpu_get_clock_locked();
384 timers_state.cpu_ticks_enabled = 0;
386 seqlock_write_end(&timers_state.vm_clock_seqlock);
389 /* Correlation between real and virtual time is always going to be
390 fairly approximate, so ignore small variation.
391 When the guest is idle real and virtual time will be aligned in
392 the IO wait loop. */
393 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
395 static void icount_adjust(void)
397 int64_t cur_time;
398 int64_t cur_icount;
399 int64_t delta;
401 /* Protected by TimersState mutex. */
402 static int64_t last_delta;
404 /* If the VM is not running, then do nothing. */
405 if (!runstate_is_running()) {
406 return;
409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
410 cur_time = cpu_get_clock_locked();
411 cur_icount = cpu_get_icount_locked();
413 delta = cur_icount - cur_time;
414 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 if (delta > 0
416 && last_delta + ICOUNT_WOBBLE < delta * 2
417 && icount_time_shift > 0) {
418 /* The guest is getting too far ahead. Slow time down. */
419 icount_time_shift--;
421 if (delta < 0
422 && last_delta - ICOUNT_WOBBLE > delta * 2
423 && icount_time_shift < MAX_ICOUNT_SHIFT) {
424 /* The guest is getting too far behind. Speed time up. */
425 icount_time_shift++;
427 last_delta = delta;
428 timers_state.qemu_icount_bias = cur_icount
429 - (timers_state.qemu_icount << icount_time_shift);
430 seqlock_write_end(&timers_state.vm_clock_seqlock);
433 static void icount_adjust_rt(void *opaque)
435 timer_mod(timers_state.icount_rt_timer,
436 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
437 icount_adjust();
440 static void icount_adjust_vm(void *opaque)
442 timer_mod(timers_state.icount_vm_timer,
443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
444 NANOSECONDS_PER_SECOND / 10);
445 icount_adjust();
448 static int64_t qemu_icount_round(int64_t count)
450 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
453 static void icount_warp_rt(void)
455 unsigned seq;
456 int64_t warp_start;
458 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
459 * changes from -1 to another value, so the race here is okay.
461 do {
462 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
463 warp_start = timers_state.vm_clock_warp_start;
464 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
466 if (warp_start == -1) {
467 return;
470 seqlock_write_begin(&timers_state.vm_clock_seqlock);
471 if (runstate_is_running()) {
472 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
473 cpu_get_clock_locked());
474 int64_t warp_delta;
476 warp_delta = clock - timers_state.vm_clock_warp_start;
477 if (use_icount == 2) {
479 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
480 * far ahead of real time.
482 int64_t cur_icount = cpu_get_icount_locked();
483 int64_t delta = clock - cur_icount;
484 warp_delta = MIN(warp_delta, delta);
486 timers_state.qemu_icount_bias += warp_delta;
488 timers_state.vm_clock_warp_start = -1;
489 seqlock_write_end(&timers_state.vm_clock_seqlock);
491 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
496 static void icount_timer_cb(void *opaque)
498 /* No need for a checkpoint because the timer already synchronizes
499 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
501 icount_warp_rt();
504 void qtest_clock_warp(int64_t dest)
506 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
507 AioContext *aio_context;
508 assert(qtest_enabled());
509 aio_context = qemu_get_aio_context();
510 while (clock < dest) {
511 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
512 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
514 seqlock_write_begin(&timers_state.vm_clock_seqlock);
515 timers_state.qemu_icount_bias += warp;
516 seqlock_write_end(&timers_state.vm_clock_seqlock);
518 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
519 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
522 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
525 void qemu_start_warp_timer(void)
527 int64_t clock;
528 int64_t deadline;
530 if (!use_icount) {
531 return;
534 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
535 * do not fire, so computing the deadline does not make sense.
537 if (!runstate_is_running()) {
538 return;
541 /* warp clock deterministically in record/replay mode */
542 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
543 return;
546 if (!all_cpu_threads_idle()) {
547 return;
550 if (qtest_enabled()) {
551 /* When testing, qtest commands advance icount. */
552 return;
555 /* We want to use the earliest deadline from ALL vm_clocks */
556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
558 if (deadline < 0) {
559 static bool notified;
560 if (!icount_sleep && !notified) {
561 warn_report("icount sleep disabled and no active timers");
562 notified = true;
564 return;
567 if (deadline > 0) {
569 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
570 * sleep. Otherwise, the CPU might be waiting for a future timer
571 * interrupt to wake it up, but the interrupt never comes because
572 * the vCPU isn't running any insns and thus doesn't advance the
573 * QEMU_CLOCK_VIRTUAL.
575 if (!icount_sleep) {
577 * We never let VCPUs sleep in no sleep icount mode.
578 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
579 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
580 * It is useful when we want a deterministic execution time,
581 * isolated from host latencies.
583 seqlock_write_begin(&timers_state.vm_clock_seqlock);
584 timers_state.qemu_icount_bias += deadline;
585 seqlock_write_end(&timers_state.vm_clock_seqlock);
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
596 seqlock_write_begin(&timers_state.vm_clock_seqlock);
597 if (timers_state.vm_clock_warp_start == -1
598 || timers_state.vm_clock_warp_start > clock) {
599 timers_state.vm_clock_warp_start = clock;
601 seqlock_write_end(&timers_state.vm_clock_seqlock);
602 timer_mod_anticipate(timers_state.icount_warp_timer,
603 clock + deadline);
605 } else if (deadline == 0) {
606 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
610 static void qemu_account_warp_timer(void)
612 if (!use_icount || !icount_sleep) {
613 return;
616 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617 * do not fire, so computing the deadline does not make sense.
619 if (!runstate_is_running()) {
620 return;
623 /* warp clock deterministically in record/replay mode */
624 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625 return;
628 timer_del(timers_state.icount_warp_timer);
629 icount_warp_rt();
632 static bool icount_state_needed(void *opaque)
634 return use_icount;
637 static bool warp_timer_state_needed(void *opaque)
639 TimersState *s = opaque;
640 return s->icount_warp_timer != NULL;
643 static bool adjust_timers_state_needed(void *opaque)
645 TimersState *s = opaque;
646 return s->icount_rt_timer != NULL;
650 * Subsection for warp timer migration is optional, because may not be created
652 static const VMStateDescription icount_vmstate_warp_timer = {
653 .name = "timer/icount/warp_timer",
654 .version_id = 1,
655 .minimum_version_id = 1,
656 .needed = warp_timer_state_needed,
657 .fields = (VMStateField[]) {
658 VMSTATE_INT64(vm_clock_warp_start, TimersState),
659 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660 VMSTATE_END_OF_LIST()
664 static const VMStateDescription icount_vmstate_adjust_timers = {
665 .name = "timer/icount/timers",
666 .version_id = 1,
667 .minimum_version_id = 1,
668 .needed = adjust_timers_state_needed,
669 .fields = (VMStateField[]) {
670 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672 VMSTATE_END_OF_LIST()
677 * This is a subsection for icount migration.
679 static const VMStateDescription icount_vmstate_timers = {
680 .name = "timer/icount",
681 .version_id = 1,
682 .minimum_version_id = 1,
683 .needed = icount_state_needed,
684 .fields = (VMStateField[]) {
685 VMSTATE_INT64(qemu_icount_bias, TimersState),
686 VMSTATE_INT64(qemu_icount, TimersState),
687 VMSTATE_END_OF_LIST()
689 .subsections = (const VMStateDescription*[]) {
690 &icount_vmstate_warp_timer,
691 &icount_vmstate_adjust_timers,
692 NULL
696 static const VMStateDescription vmstate_timers = {
697 .name = "timer",
698 .version_id = 2,
699 .minimum_version_id = 1,
700 .fields = (VMStateField[]) {
701 VMSTATE_INT64(cpu_ticks_offset, TimersState),
702 VMSTATE_INT64(dummy, TimersState),
703 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704 VMSTATE_END_OF_LIST()
706 .subsections = (const VMStateDescription*[]) {
707 &icount_vmstate_timers,
708 NULL
712 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
714 double pct;
715 double throttle_ratio;
716 long sleeptime_ns;
718 if (!cpu_throttle_get_percentage()) {
719 return;
722 pct = (double)cpu_throttle_get_percentage()/100;
723 throttle_ratio = pct / (1 - pct);
724 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
726 qemu_mutex_unlock_iothread();
727 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
728 qemu_mutex_lock_iothread();
729 atomic_set(&cpu->throttle_thread_scheduled, 0);
732 static void cpu_throttle_timer_tick(void *opaque)
734 CPUState *cpu;
735 double pct;
737 /* Stop the timer if needed */
738 if (!cpu_throttle_get_percentage()) {
739 return;
741 CPU_FOREACH(cpu) {
742 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
743 async_run_on_cpu(cpu, cpu_throttle_thread,
744 RUN_ON_CPU_NULL);
748 pct = (double)cpu_throttle_get_percentage()/100;
749 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
750 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
753 void cpu_throttle_set(int new_throttle_pct)
755 /* Ensure throttle percentage is within valid range */
756 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
757 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
759 atomic_set(&throttle_percentage, new_throttle_pct);
761 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
762 CPU_THROTTLE_TIMESLICE_NS);
765 void cpu_throttle_stop(void)
767 atomic_set(&throttle_percentage, 0);
770 bool cpu_throttle_active(void)
772 return (cpu_throttle_get_percentage() != 0);
775 int cpu_throttle_get_percentage(void)
777 return atomic_read(&throttle_percentage);
780 void cpu_ticks_init(void)
782 seqlock_init(&timers_state.vm_clock_seqlock);
783 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
784 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
785 cpu_throttle_timer_tick, NULL);
788 void configure_icount(QemuOpts *opts, Error **errp)
790 const char *option;
791 char *rem_str = NULL;
793 option = qemu_opt_get(opts, "shift");
794 if (!option) {
795 if (qemu_opt_get(opts, "align") != NULL) {
796 error_setg(errp, "Please specify shift option when using align");
798 return;
801 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
802 if (icount_sleep) {
803 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
804 icount_timer_cb, NULL);
807 icount_align_option = qemu_opt_get_bool(opts, "align", false);
809 if (icount_align_option && !icount_sleep) {
810 error_setg(errp, "align=on and sleep=off are incompatible");
812 if (strcmp(option, "auto") != 0) {
813 errno = 0;
814 icount_time_shift = strtol(option, &rem_str, 0);
815 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
816 error_setg(errp, "icount: Invalid shift value");
818 use_icount = 1;
819 return;
820 } else if (icount_align_option) {
821 error_setg(errp, "shift=auto and align=on are incompatible");
822 } else if (!icount_sleep) {
823 error_setg(errp, "shift=auto and sleep=off are incompatible");
826 use_icount = 2;
828 /* 125MIPS seems a reasonable initial guess at the guest speed.
829 It will be corrected fairly quickly anyway. */
830 icount_time_shift = 3;
832 /* Have both realtime and virtual time triggers for speed adjustment.
833 The realtime trigger catches emulated time passing too slowly,
834 the virtual time trigger catches emulated time passing too fast.
835 Realtime triggers occur even when idle, so use them less frequently
836 than VM triggers. */
837 timers_state.vm_clock_warp_start = -1;
838 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
839 icount_adjust_rt, NULL);
840 timer_mod(timers_state.icount_rt_timer,
841 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
842 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
843 icount_adjust_vm, NULL);
844 timer_mod(timers_state.icount_vm_timer,
845 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
846 NANOSECONDS_PER_SECOND / 10);
849 /***********************************************************/
850 /* TCG vCPU kick timer
852 * The kick timer is responsible for moving single threaded vCPU
853 * emulation on to the next vCPU. If more than one vCPU is running a
854 * timer event with force a cpu->exit so the next vCPU can get
855 * scheduled.
857 * The timer is removed if all vCPUs are idle and restarted again once
858 * idleness is complete.
861 static QEMUTimer *tcg_kick_vcpu_timer;
862 static CPUState *tcg_current_rr_cpu;
864 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
866 static inline int64_t qemu_tcg_next_kick(void)
868 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
871 /* Kick the currently round-robin scheduled vCPU */
872 static void qemu_cpu_kick_rr_cpu(void)
874 CPUState *cpu;
875 do {
876 cpu = atomic_mb_read(&tcg_current_rr_cpu);
877 if (cpu) {
878 cpu_exit(cpu);
880 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
883 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
887 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
889 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
890 qemu_notify_event();
891 return;
894 if (!qemu_in_vcpu_thread() && first_cpu) {
895 /* qemu_cpu_kick is not enough to kick a halted CPU out of
896 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
897 * causes cpu_thread_is_idle to return false. This way,
898 * handle_icount_deadline can run.
900 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
904 static void kick_tcg_thread(void *opaque)
906 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
907 qemu_cpu_kick_rr_cpu();
910 static void start_tcg_kick_timer(void)
912 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
913 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
914 kick_tcg_thread, NULL);
915 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
919 static void stop_tcg_kick_timer(void)
921 if (tcg_kick_vcpu_timer) {
922 timer_del(tcg_kick_vcpu_timer);
923 tcg_kick_vcpu_timer = NULL;
927 /***********************************************************/
928 void hw_error(const char *fmt, ...)
930 va_list ap;
931 CPUState *cpu;
933 va_start(ap, fmt);
934 fprintf(stderr, "qemu: hardware error: ");
935 vfprintf(stderr, fmt, ap);
936 fprintf(stderr, "\n");
937 CPU_FOREACH(cpu) {
938 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
939 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
941 va_end(ap);
942 abort();
945 void cpu_synchronize_all_states(void)
947 CPUState *cpu;
949 CPU_FOREACH(cpu) {
950 cpu_synchronize_state(cpu);
951 /* TODO: move to cpu_synchronize_state() */
952 if (hvf_enabled()) {
953 hvf_cpu_synchronize_state(cpu);
958 void cpu_synchronize_all_post_reset(void)
960 CPUState *cpu;
962 CPU_FOREACH(cpu) {
963 cpu_synchronize_post_reset(cpu);
964 /* TODO: move to cpu_synchronize_post_reset() */
965 if (hvf_enabled()) {
966 hvf_cpu_synchronize_post_reset(cpu);
971 void cpu_synchronize_all_post_init(void)
973 CPUState *cpu;
975 CPU_FOREACH(cpu) {
976 cpu_synchronize_post_init(cpu);
977 /* TODO: move to cpu_synchronize_post_init() */
978 if (hvf_enabled()) {
979 hvf_cpu_synchronize_post_init(cpu);
984 void cpu_synchronize_all_pre_loadvm(void)
986 CPUState *cpu;
988 CPU_FOREACH(cpu) {
989 cpu_synchronize_pre_loadvm(cpu);
993 static int do_vm_stop(RunState state)
995 int ret = 0;
997 if (runstate_is_running()) {
998 cpu_disable_ticks();
999 pause_all_vcpus();
1000 runstate_set(state);
1001 vm_state_notify(0, state);
1002 qapi_event_send_stop(&error_abort);
1005 bdrv_drain_all();
1006 replay_disable_events();
1007 ret = bdrv_flush_all();
1009 return ret;
1012 static bool cpu_can_run(CPUState *cpu)
1014 if (cpu->stop) {
1015 return false;
1017 if (cpu_is_stopped(cpu)) {
1018 return false;
1020 return true;
1023 static void cpu_handle_guest_debug(CPUState *cpu)
1025 gdb_set_stop_cpu(cpu);
1026 qemu_system_debug_request();
1027 cpu->stopped = true;
1030 #ifdef CONFIG_LINUX
1031 static void sigbus_reraise(void)
1033 sigset_t set;
1034 struct sigaction action;
1036 memset(&action, 0, sizeof(action));
1037 action.sa_handler = SIG_DFL;
1038 if (!sigaction(SIGBUS, &action, NULL)) {
1039 raise(SIGBUS);
1040 sigemptyset(&set);
1041 sigaddset(&set, SIGBUS);
1042 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1044 perror("Failed to re-raise SIGBUS!\n");
1045 abort();
1048 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1050 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1051 sigbus_reraise();
1054 if (current_cpu) {
1055 /* Called asynchronously in VCPU thread. */
1056 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1057 sigbus_reraise();
1059 } else {
1060 /* Called synchronously (via signalfd) in main thread. */
1061 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1062 sigbus_reraise();
1067 static void qemu_init_sigbus(void)
1069 struct sigaction action;
1071 memset(&action, 0, sizeof(action));
1072 action.sa_flags = SA_SIGINFO;
1073 action.sa_sigaction = sigbus_handler;
1074 sigaction(SIGBUS, &action, NULL);
1076 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1078 #else /* !CONFIG_LINUX */
1079 static void qemu_init_sigbus(void)
1082 #endif /* !CONFIG_LINUX */
1084 static QemuMutex qemu_global_mutex;
1086 static QemuThread io_thread;
1088 /* cpu creation */
1089 static QemuCond qemu_cpu_cond;
1090 /* system init */
1091 static QemuCond qemu_pause_cond;
1093 void qemu_init_cpu_loop(void)
1095 qemu_init_sigbus();
1096 qemu_cond_init(&qemu_cpu_cond);
1097 qemu_cond_init(&qemu_pause_cond);
1098 qemu_mutex_init(&qemu_global_mutex);
1100 qemu_thread_get_self(&io_thread);
1103 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1105 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1108 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1110 if (kvm_destroy_vcpu(cpu) < 0) {
1111 error_report("kvm_destroy_vcpu failed");
1112 exit(EXIT_FAILURE);
1116 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1120 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1122 g_assert(qemu_cpu_is_self(cpu));
1123 cpu->stop = false;
1124 cpu->stopped = true;
1125 if (exit) {
1126 cpu_exit(cpu);
1128 qemu_cond_broadcast(&qemu_pause_cond);
1131 static void qemu_wait_io_event_common(CPUState *cpu)
1133 atomic_mb_set(&cpu->thread_kicked, false);
1134 if (cpu->stop) {
1135 qemu_cpu_stop(cpu, false);
1137 process_queued_cpu_work(cpu);
1140 static bool qemu_tcg_should_sleep(CPUState *cpu)
1142 if (mttcg_enabled) {
1143 return cpu_thread_is_idle(cpu);
1144 } else {
1145 return all_cpu_threads_idle();
1149 static void qemu_tcg_wait_io_event(CPUState *cpu)
1151 while (qemu_tcg_should_sleep(cpu)) {
1152 stop_tcg_kick_timer();
1153 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1156 start_tcg_kick_timer();
1158 qemu_wait_io_event_common(cpu);
1161 static void qemu_kvm_wait_io_event(CPUState *cpu)
1163 while (cpu_thread_is_idle(cpu)) {
1164 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1167 qemu_wait_io_event_common(cpu);
1170 static void qemu_hvf_wait_io_event(CPUState *cpu)
1172 while (cpu_thread_is_idle(cpu)) {
1173 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1175 qemu_wait_io_event_common(cpu);
1178 static void *qemu_kvm_cpu_thread_fn(void *arg)
1180 CPUState *cpu = arg;
1181 int r;
1183 rcu_register_thread();
1185 qemu_mutex_lock_iothread();
1186 qemu_thread_get_self(cpu->thread);
1187 cpu->thread_id = qemu_get_thread_id();
1188 cpu->can_do_io = 1;
1189 current_cpu = cpu;
1191 r = kvm_init_vcpu(cpu);
1192 if (r < 0) {
1193 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1194 exit(1);
1197 kvm_init_cpu_signals(cpu);
1199 /* signal CPU creation */
1200 cpu->created = true;
1201 qemu_cond_signal(&qemu_cpu_cond);
1203 do {
1204 if (cpu_can_run(cpu)) {
1205 r = kvm_cpu_exec(cpu);
1206 if (r == EXCP_DEBUG) {
1207 cpu_handle_guest_debug(cpu);
1210 qemu_kvm_wait_io_event(cpu);
1211 } while (!cpu->unplug || cpu_can_run(cpu));
1213 qemu_kvm_destroy_vcpu(cpu);
1214 cpu->created = false;
1215 qemu_cond_signal(&qemu_cpu_cond);
1216 qemu_mutex_unlock_iothread();
1217 return NULL;
1220 static void *qemu_dummy_cpu_thread_fn(void *arg)
1222 #ifdef _WIN32
1223 fprintf(stderr, "qtest is not supported under Windows\n");
1224 exit(1);
1225 #else
1226 CPUState *cpu = arg;
1227 sigset_t waitset;
1228 int r;
1230 rcu_register_thread();
1232 qemu_mutex_lock_iothread();
1233 qemu_thread_get_self(cpu->thread);
1234 cpu->thread_id = qemu_get_thread_id();
1235 cpu->can_do_io = 1;
1236 current_cpu = cpu;
1238 sigemptyset(&waitset);
1239 sigaddset(&waitset, SIG_IPI);
1241 /* signal CPU creation */
1242 cpu->created = true;
1243 qemu_cond_signal(&qemu_cpu_cond);
1245 while (1) {
1246 qemu_mutex_unlock_iothread();
1247 do {
1248 int sig;
1249 r = sigwait(&waitset, &sig);
1250 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1251 if (r == -1) {
1252 perror("sigwait");
1253 exit(1);
1255 qemu_mutex_lock_iothread();
1256 qemu_wait_io_event_common(cpu);
1259 return NULL;
1260 #endif
1263 static int64_t tcg_get_icount_limit(void)
1265 int64_t deadline;
1267 if (replay_mode != REPLAY_MODE_PLAY) {
1268 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1270 /* Maintain prior (possibly buggy) behaviour where if no deadline
1271 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1272 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1273 * nanoseconds.
1275 if ((deadline < 0) || (deadline > INT32_MAX)) {
1276 deadline = INT32_MAX;
1279 return qemu_icount_round(deadline);
1280 } else {
1281 return replay_get_instructions();
1285 static void handle_icount_deadline(void)
1287 assert(qemu_in_vcpu_thread());
1288 if (use_icount) {
1289 int64_t deadline =
1290 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1292 if (deadline == 0) {
1293 /* Wake up other AioContexts. */
1294 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1295 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1300 static void prepare_icount_for_run(CPUState *cpu)
1302 if (use_icount) {
1303 int insns_left;
1305 /* These should always be cleared by process_icount_data after
1306 * each vCPU execution. However u16.high can be raised
1307 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1309 g_assert(cpu->icount_decr.u16.low == 0);
1310 g_assert(cpu->icount_extra == 0);
1312 cpu->icount_budget = tcg_get_icount_limit();
1313 insns_left = MIN(0xffff, cpu->icount_budget);
1314 cpu->icount_decr.u16.low = insns_left;
1315 cpu->icount_extra = cpu->icount_budget - insns_left;
1319 static void process_icount_data(CPUState *cpu)
1321 if (use_icount) {
1322 /* Account for executed instructions */
1323 cpu_update_icount(cpu);
1325 /* Reset the counters */
1326 cpu->icount_decr.u16.low = 0;
1327 cpu->icount_extra = 0;
1328 cpu->icount_budget = 0;
1330 replay_account_executed_instructions();
1335 static int tcg_cpu_exec(CPUState *cpu)
1337 int ret;
1338 #ifdef CONFIG_PROFILER
1339 int64_t ti;
1340 #endif
1342 #ifdef CONFIG_PROFILER
1343 ti = profile_getclock();
1344 #endif
1345 qemu_mutex_unlock_iothread();
1346 cpu_exec_start(cpu);
1347 ret = cpu_exec(cpu);
1348 cpu_exec_end(cpu);
1349 qemu_mutex_lock_iothread();
1350 #ifdef CONFIG_PROFILER
1351 tcg_time += profile_getclock() - ti;
1352 #endif
1353 return ret;
1356 /* Destroy any remaining vCPUs which have been unplugged and have
1357 * finished running
1359 static void deal_with_unplugged_cpus(void)
1361 CPUState *cpu;
1363 CPU_FOREACH(cpu) {
1364 if (cpu->unplug && !cpu_can_run(cpu)) {
1365 qemu_tcg_destroy_vcpu(cpu);
1366 cpu->created = false;
1367 qemu_cond_signal(&qemu_cpu_cond);
1368 break;
1373 /* Single-threaded TCG
1375 * In the single-threaded case each vCPU is simulated in turn. If
1376 * there is more than a single vCPU we create a simple timer to kick
1377 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1378 * This is done explicitly rather than relying on side-effects
1379 * elsewhere.
1382 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1384 CPUState *cpu = arg;
1386 rcu_register_thread();
1387 tcg_register_thread();
1389 qemu_mutex_lock_iothread();
1390 qemu_thread_get_self(cpu->thread);
1392 CPU_FOREACH(cpu) {
1393 cpu->thread_id = qemu_get_thread_id();
1394 cpu->created = true;
1395 cpu->can_do_io = 1;
1397 qemu_cond_signal(&qemu_cpu_cond);
1399 /* wait for initial kick-off after machine start */
1400 while (first_cpu->stopped) {
1401 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1403 /* process any pending work */
1404 CPU_FOREACH(cpu) {
1405 current_cpu = cpu;
1406 qemu_wait_io_event_common(cpu);
1410 start_tcg_kick_timer();
1412 cpu = first_cpu;
1414 /* process any pending work */
1415 cpu->exit_request = 1;
1417 while (1) {
1418 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1419 qemu_account_warp_timer();
1421 /* Run the timers here. This is much more efficient than
1422 * waking up the I/O thread and waiting for completion.
1424 handle_icount_deadline();
1426 if (!cpu) {
1427 cpu = first_cpu;
1430 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1432 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1433 current_cpu = cpu;
1435 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1436 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1438 if (cpu_can_run(cpu)) {
1439 int r;
1441 prepare_icount_for_run(cpu);
1443 r = tcg_cpu_exec(cpu);
1445 process_icount_data(cpu);
1447 if (r == EXCP_DEBUG) {
1448 cpu_handle_guest_debug(cpu);
1449 break;
1450 } else if (r == EXCP_ATOMIC) {
1451 qemu_mutex_unlock_iothread();
1452 cpu_exec_step_atomic(cpu);
1453 qemu_mutex_lock_iothread();
1454 break;
1456 } else if (cpu->stop) {
1457 if (cpu->unplug) {
1458 cpu = CPU_NEXT(cpu);
1460 break;
1463 cpu = CPU_NEXT(cpu);
1464 } /* while (cpu && !cpu->exit_request).. */
1466 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1467 atomic_set(&tcg_current_rr_cpu, NULL);
1469 if (cpu && cpu->exit_request) {
1470 atomic_mb_set(&cpu->exit_request, 0);
1473 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1474 deal_with_unplugged_cpus();
1477 return NULL;
1480 static void *qemu_hax_cpu_thread_fn(void *arg)
1482 CPUState *cpu = arg;
1483 int r;
1485 qemu_mutex_lock_iothread();
1486 qemu_thread_get_self(cpu->thread);
1488 cpu->thread_id = qemu_get_thread_id();
1489 cpu->created = true;
1490 cpu->halted = 0;
1491 current_cpu = cpu;
1493 hax_init_vcpu(cpu);
1494 qemu_cond_signal(&qemu_cpu_cond);
1496 while (1) {
1497 if (cpu_can_run(cpu)) {
1498 r = hax_smp_cpu_exec(cpu);
1499 if (r == EXCP_DEBUG) {
1500 cpu_handle_guest_debug(cpu);
1504 while (cpu_thread_is_idle(cpu)) {
1505 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1507 #ifdef _WIN32
1508 SleepEx(0, TRUE);
1509 #endif
1510 qemu_wait_io_event_common(cpu);
1512 return NULL;
1515 /* The HVF-specific vCPU thread function. This one should only run when the host
1516 * CPU supports the VMX "unrestricted guest" feature. */
1517 static void *qemu_hvf_cpu_thread_fn(void *arg)
1519 CPUState *cpu = arg;
1521 int r;
1523 assert(hvf_enabled());
1525 rcu_register_thread();
1527 qemu_mutex_lock_iothread();
1528 qemu_thread_get_self(cpu->thread);
1530 cpu->thread_id = qemu_get_thread_id();
1531 cpu->can_do_io = 1;
1532 current_cpu = cpu;
1534 hvf_init_vcpu(cpu);
1536 /* signal CPU creation */
1537 cpu->created = true;
1538 qemu_cond_signal(&qemu_cpu_cond);
1540 do {
1541 if (cpu_can_run(cpu)) {
1542 r = hvf_vcpu_exec(cpu);
1543 if (r == EXCP_DEBUG) {
1544 cpu_handle_guest_debug(cpu);
1547 qemu_hvf_wait_io_event(cpu);
1548 } while (!cpu->unplug || cpu_can_run(cpu));
1550 hvf_vcpu_destroy(cpu);
1551 cpu->created = false;
1552 qemu_cond_signal(&qemu_cpu_cond);
1553 qemu_mutex_unlock_iothread();
1554 return NULL;
1557 #ifdef _WIN32
1558 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1561 #endif
1563 /* Multi-threaded TCG
1565 * In the multi-threaded case each vCPU has its own thread. The TLS
1566 * variable current_cpu can be used deep in the code to find the
1567 * current CPUState for a given thread.
1570 static void *qemu_tcg_cpu_thread_fn(void *arg)
1572 CPUState *cpu = arg;
1574 g_assert(!use_icount);
1576 rcu_register_thread();
1577 tcg_register_thread();
1579 qemu_mutex_lock_iothread();
1580 qemu_thread_get_self(cpu->thread);
1582 cpu->thread_id = qemu_get_thread_id();
1583 cpu->created = true;
1584 cpu->can_do_io = 1;
1585 current_cpu = cpu;
1586 qemu_cond_signal(&qemu_cpu_cond);
1588 /* process any pending work */
1589 cpu->exit_request = 1;
1591 while (1) {
1592 if (cpu_can_run(cpu)) {
1593 int r;
1594 r = tcg_cpu_exec(cpu);
1595 switch (r) {
1596 case EXCP_DEBUG:
1597 cpu_handle_guest_debug(cpu);
1598 break;
1599 case EXCP_HALTED:
1600 /* during start-up the vCPU is reset and the thread is
1601 * kicked several times. If we don't ensure we go back
1602 * to sleep in the halted state we won't cleanly
1603 * start-up when the vCPU is enabled.
1605 * cpu->halted should ensure we sleep in wait_io_event
1607 g_assert(cpu->halted);
1608 break;
1609 case EXCP_ATOMIC:
1610 qemu_mutex_unlock_iothread();
1611 cpu_exec_step_atomic(cpu);
1612 qemu_mutex_lock_iothread();
1613 default:
1614 /* Ignore everything else? */
1615 break;
1617 } else if (cpu->unplug) {
1618 qemu_tcg_destroy_vcpu(cpu);
1619 cpu->created = false;
1620 qemu_cond_signal(&qemu_cpu_cond);
1621 qemu_mutex_unlock_iothread();
1622 return NULL;
1625 atomic_mb_set(&cpu->exit_request, 0);
1626 qemu_tcg_wait_io_event(cpu);
1629 return NULL;
1632 static void qemu_cpu_kick_thread(CPUState *cpu)
1634 #ifndef _WIN32
1635 int err;
1637 if (cpu->thread_kicked) {
1638 return;
1640 cpu->thread_kicked = true;
1641 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1642 if (err) {
1643 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1644 exit(1);
1646 #else /* _WIN32 */
1647 if (!qemu_cpu_is_self(cpu)) {
1648 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1649 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1650 __func__, GetLastError());
1651 exit(1);
1654 #endif
1657 void qemu_cpu_kick(CPUState *cpu)
1659 qemu_cond_broadcast(cpu->halt_cond);
1660 if (tcg_enabled()) {
1661 cpu_exit(cpu);
1662 /* NOP unless doing single-thread RR */
1663 qemu_cpu_kick_rr_cpu();
1664 } else {
1665 if (hax_enabled()) {
1667 * FIXME: race condition with the exit_request check in
1668 * hax_vcpu_hax_exec
1670 cpu->exit_request = 1;
1672 qemu_cpu_kick_thread(cpu);
1676 void qemu_cpu_kick_self(void)
1678 assert(current_cpu);
1679 qemu_cpu_kick_thread(current_cpu);
1682 bool qemu_cpu_is_self(CPUState *cpu)
1684 return qemu_thread_is_self(cpu->thread);
1687 bool qemu_in_vcpu_thread(void)
1689 return current_cpu && qemu_cpu_is_self(current_cpu);
1692 static __thread bool iothread_locked = false;
1694 bool qemu_mutex_iothread_locked(void)
1696 return iothread_locked;
1699 void qemu_mutex_lock_iothread(void)
1701 g_assert(!qemu_mutex_iothread_locked());
1702 qemu_mutex_lock(&qemu_global_mutex);
1703 iothread_locked = true;
1706 void qemu_mutex_unlock_iothread(void)
1708 g_assert(qemu_mutex_iothread_locked());
1709 iothread_locked = false;
1710 qemu_mutex_unlock(&qemu_global_mutex);
1713 static bool all_vcpus_paused(void)
1715 CPUState *cpu;
1717 CPU_FOREACH(cpu) {
1718 if (!cpu->stopped) {
1719 return false;
1723 return true;
1726 void pause_all_vcpus(void)
1728 CPUState *cpu;
1730 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1731 CPU_FOREACH(cpu) {
1732 if (qemu_cpu_is_self(cpu)) {
1733 qemu_cpu_stop(cpu, true);
1734 } else {
1735 cpu->stop = true;
1736 qemu_cpu_kick(cpu);
1740 while (!all_vcpus_paused()) {
1741 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1742 CPU_FOREACH(cpu) {
1743 qemu_cpu_kick(cpu);
1748 void cpu_resume(CPUState *cpu)
1750 cpu->stop = false;
1751 cpu->stopped = false;
1752 qemu_cpu_kick(cpu);
1755 void resume_all_vcpus(void)
1757 CPUState *cpu;
1759 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1760 CPU_FOREACH(cpu) {
1761 cpu_resume(cpu);
1765 void cpu_remove(CPUState *cpu)
1767 cpu->stop = true;
1768 cpu->unplug = true;
1769 qemu_cpu_kick(cpu);
1772 void cpu_remove_sync(CPUState *cpu)
1774 cpu_remove(cpu);
1775 while (cpu->created) {
1776 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1780 /* For temporary buffers for forming a name */
1781 #define VCPU_THREAD_NAME_SIZE 16
1783 static void qemu_tcg_init_vcpu(CPUState *cpu)
1785 char thread_name[VCPU_THREAD_NAME_SIZE];
1786 static QemuCond *single_tcg_halt_cond;
1787 static QemuThread *single_tcg_cpu_thread;
1788 static int tcg_region_inited;
1791 * Initialize TCG regions--once. Now is a good time, because:
1792 * (1) TCG's init context, prologue and target globals have been set up.
1793 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1794 * -accel flag is processed, so the check doesn't work then).
1796 if (!tcg_region_inited) {
1797 tcg_region_inited = 1;
1798 tcg_region_init();
1801 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1802 cpu->thread = g_malloc0(sizeof(QemuThread));
1803 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1804 qemu_cond_init(cpu->halt_cond);
1806 if (qemu_tcg_mttcg_enabled()) {
1807 /* create a thread per vCPU with TCG (MTTCG) */
1808 parallel_cpus = true;
1809 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1810 cpu->cpu_index);
1812 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1813 cpu, QEMU_THREAD_JOINABLE);
1815 } else {
1816 /* share a single thread for all cpus with TCG */
1817 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1818 qemu_thread_create(cpu->thread, thread_name,
1819 qemu_tcg_rr_cpu_thread_fn,
1820 cpu, QEMU_THREAD_JOINABLE);
1822 single_tcg_halt_cond = cpu->halt_cond;
1823 single_tcg_cpu_thread = cpu->thread;
1825 #ifdef _WIN32
1826 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1827 #endif
1828 while (!cpu->created) {
1829 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1831 } else {
1832 /* For non-MTTCG cases we share the thread */
1833 cpu->thread = single_tcg_cpu_thread;
1834 cpu->halt_cond = single_tcg_halt_cond;
1838 static void qemu_hax_start_vcpu(CPUState *cpu)
1840 char thread_name[VCPU_THREAD_NAME_SIZE];
1842 cpu->thread = g_malloc0(sizeof(QemuThread));
1843 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1844 qemu_cond_init(cpu->halt_cond);
1846 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1847 cpu->cpu_index);
1848 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1849 cpu, QEMU_THREAD_JOINABLE);
1850 #ifdef _WIN32
1851 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1852 #endif
1853 while (!cpu->created) {
1854 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1858 static void qemu_kvm_start_vcpu(CPUState *cpu)
1860 char thread_name[VCPU_THREAD_NAME_SIZE];
1862 cpu->thread = g_malloc0(sizeof(QemuThread));
1863 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1864 qemu_cond_init(cpu->halt_cond);
1865 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1866 cpu->cpu_index);
1867 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1868 cpu, QEMU_THREAD_JOINABLE);
1869 while (!cpu->created) {
1870 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1874 static void qemu_hvf_start_vcpu(CPUState *cpu)
1876 char thread_name[VCPU_THREAD_NAME_SIZE];
1878 /* HVF currently does not support TCG, and only runs in
1879 * unrestricted-guest mode. */
1880 assert(hvf_enabled());
1882 cpu->thread = g_malloc0(sizeof(QemuThread));
1883 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1884 qemu_cond_init(cpu->halt_cond);
1886 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1887 cpu->cpu_index);
1888 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1889 cpu, QEMU_THREAD_JOINABLE);
1890 while (!cpu->created) {
1891 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1895 static void qemu_dummy_start_vcpu(CPUState *cpu)
1897 char thread_name[VCPU_THREAD_NAME_SIZE];
1899 cpu->thread = g_malloc0(sizeof(QemuThread));
1900 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1901 qemu_cond_init(cpu->halt_cond);
1902 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1903 cpu->cpu_index);
1904 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1905 QEMU_THREAD_JOINABLE);
1906 while (!cpu->created) {
1907 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1911 void qemu_init_vcpu(CPUState *cpu)
1913 cpu->nr_cores = smp_cores;
1914 cpu->nr_threads = smp_threads;
1915 cpu->stopped = true;
1917 if (!cpu->as) {
1918 /* If the target cpu hasn't set up any address spaces itself,
1919 * give it the default one.
1921 cpu->num_ases = 1;
1922 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1925 if (kvm_enabled()) {
1926 qemu_kvm_start_vcpu(cpu);
1927 } else if (hax_enabled()) {
1928 qemu_hax_start_vcpu(cpu);
1929 } else if (hvf_enabled()) {
1930 qemu_hvf_start_vcpu(cpu);
1931 } else if (tcg_enabled()) {
1932 qemu_tcg_init_vcpu(cpu);
1933 } else {
1934 qemu_dummy_start_vcpu(cpu);
1938 void cpu_stop_current(void)
1940 if (current_cpu) {
1941 qemu_cpu_stop(current_cpu, true);
1945 int vm_stop(RunState state)
1947 if (qemu_in_vcpu_thread()) {
1948 qemu_system_vmstop_request_prepare();
1949 qemu_system_vmstop_request(state);
1951 * FIXME: should not return to device code in case
1952 * vm_stop() has been requested.
1954 cpu_stop_current();
1955 return 0;
1958 return do_vm_stop(state);
1962 * Prepare for (re)starting the VM.
1963 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1964 * running or in case of an error condition), 0 otherwise.
1966 int vm_prepare_start(void)
1968 RunState requested;
1969 int res = 0;
1971 qemu_vmstop_requested(&requested);
1972 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1973 return -1;
1976 /* Ensure that a STOP/RESUME pair of events is emitted if a
1977 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1978 * example, according to documentation is always followed by
1979 * the STOP event.
1981 if (runstate_is_running()) {
1982 qapi_event_send_stop(&error_abort);
1983 res = -1;
1984 } else {
1985 replay_enable_events();
1986 cpu_enable_ticks();
1987 runstate_set(RUN_STATE_RUNNING);
1988 vm_state_notify(1, RUN_STATE_RUNNING);
1991 /* We are sending this now, but the CPUs will be resumed shortly later */
1992 qapi_event_send_resume(&error_abort);
1993 return res;
1996 void vm_start(void)
1998 if (!vm_prepare_start()) {
1999 resume_all_vcpus();
2003 /* does a state transition even if the VM is already stopped,
2004 current state is forgotten forever */
2005 int vm_stop_force_state(RunState state)
2007 if (runstate_is_running()) {
2008 return vm_stop(state);
2009 } else {
2010 runstate_set(state);
2012 bdrv_drain_all();
2013 /* Make sure to return an error if the flush in a previous vm_stop()
2014 * failed. */
2015 return bdrv_flush_all();
2019 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2021 /* XXX: implement xxx_cpu_list for targets that still miss it */
2022 #if defined(cpu_list)
2023 cpu_list(f, cpu_fprintf);
2024 #endif
2027 CpuInfoList *qmp_query_cpus(Error **errp)
2029 MachineState *ms = MACHINE(qdev_get_machine());
2030 MachineClass *mc = MACHINE_GET_CLASS(ms);
2031 CpuInfoList *head = NULL, *cur_item = NULL;
2032 CPUState *cpu;
2034 CPU_FOREACH(cpu) {
2035 CpuInfoList *info;
2036 #if defined(TARGET_I386)
2037 X86CPU *x86_cpu = X86_CPU(cpu);
2038 CPUX86State *env = &x86_cpu->env;
2039 #elif defined(TARGET_PPC)
2040 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2041 CPUPPCState *env = &ppc_cpu->env;
2042 #elif defined(TARGET_SPARC)
2043 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2044 CPUSPARCState *env = &sparc_cpu->env;
2045 #elif defined(TARGET_MIPS)
2046 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2047 CPUMIPSState *env = &mips_cpu->env;
2048 #elif defined(TARGET_TRICORE)
2049 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2050 CPUTriCoreState *env = &tricore_cpu->env;
2051 #endif
2053 cpu_synchronize_state(cpu);
2055 info = g_malloc0(sizeof(*info));
2056 info->value = g_malloc0(sizeof(*info->value));
2057 info->value->CPU = cpu->cpu_index;
2058 info->value->current = (cpu == first_cpu);
2059 info->value->halted = cpu->halted;
2060 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2061 info->value->thread_id = cpu->thread_id;
2062 #if defined(TARGET_I386)
2063 info->value->arch = CPU_INFO_ARCH_X86;
2064 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2065 #elif defined(TARGET_PPC)
2066 info->value->arch = CPU_INFO_ARCH_PPC;
2067 info->value->u.ppc.nip = env->nip;
2068 #elif defined(TARGET_SPARC)
2069 info->value->arch = CPU_INFO_ARCH_SPARC;
2070 info->value->u.q_sparc.pc = env->pc;
2071 info->value->u.q_sparc.npc = env->npc;
2072 #elif defined(TARGET_MIPS)
2073 info->value->arch = CPU_INFO_ARCH_MIPS;
2074 info->value->u.q_mips.PC = env->active_tc.PC;
2075 #elif defined(TARGET_TRICORE)
2076 info->value->arch = CPU_INFO_ARCH_TRICORE;
2077 info->value->u.tricore.PC = env->PC;
2078 #else
2079 info->value->arch = CPU_INFO_ARCH_OTHER;
2080 #endif
2081 info->value->has_props = !!mc->cpu_index_to_instance_props;
2082 if (info->value->has_props) {
2083 CpuInstanceProperties *props;
2084 props = g_malloc0(sizeof(*props));
2085 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2086 info->value->props = props;
2089 /* XXX: waiting for the qapi to support GSList */
2090 if (!cur_item) {
2091 head = cur_item = info;
2092 } else {
2093 cur_item->next = info;
2094 cur_item = info;
2098 return head;
2101 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2102 bool has_cpu, int64_t cpu_index, Error **errp)
2104 FILE *f;
2105 uint32_t l;
2106 CPUState *cpu;
2107 uint8_t buf[1024];
2108 int64_t orig_addr = addr, orig_size = size;
2110 if (!has_cpu) {
2111 cpu_index = 0;
2114 cpu = qemu_get_cpu(cpu_index);
2115 if (cpu == NULL) {
2116 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2117 "a CPU number");
2118 return;
2121 f = fopen(filename, "wb");
2122 if (!f) {
2123 error_setg_file_open(errp, errno, filename);
2124 return;
2127 while (size != 0) {
2128 l = sizeof(buf);
2129 if (l > size)
2130 l = size;
2131 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2132 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2133 " specified", orig_addr, orig_size);
2134 goto exit;
2136 if (fwrite(buf, 1, l, f) != l) {
2137 error_setg(errp, QERR_IO_ERROR);
2138 goto exit;
2140 addr += l;
2141 size -= l;
2144 exit:
2145 fclose(f);
2148 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2149 Error **errp)
2151 FILE *f;
2152 uint32_t l;
2153 uint8_t buf[1024];
2155 f = fopen(filename, "wb");
2156 if (!f) {
2157 error_setg_file_open(errp, errno, filename);
2158 return;
2161 while (size != 0) {
2162 l = sizeof(buf);
2163 if (l > size)
2164 l = size;
2165 cpu_physical_memory_read(addr, buf, l);
2166 if (fwrite(buf, 1, l, f) != l) {
2167 error_setg(errp, QERR_IO_ERROR);
2168 goto exit;
2170 addr += l;
2171 size -= l;
2174 exit:
2175 fclose(f);
2178 void qmp_inject_nmi(Error **errp)
2180 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2183 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2185 if (!use_icount) {
2186 return;
2189 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2190 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2191 if (icount_align_option) {
2192 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2193 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2194 } else {
2195 cpu_fprintf(f, "Max guest delay NA\n");
2196 cpu_fprintf(f, "Max guest advance NA\n");