Merge remote-tracking branch 'qemu/master'
[qemu/ar7.git] / cpus.c
blob861e8b71a88a63a8a5a0bb16ada362923540753d
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
54 #ifdef CONFIG_LINUX
56 #include <sys/prctl.h>
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
70 #endif /* CONFIG_LINUX */
72 int64_t max_delay;
73 int64_t max_advance;
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState *cpu)
85 return cpu->stopped || !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState *cpu)
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
93 if (cpu_is_stopped(cpu)) {
94 return true;
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
100 return true;
103 static bool all_cpu_threads_idle(void)
105 CPUState *cpu;
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
112 return true;
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
150 static TimersState timers_state;
151 bool mttcg_enabled;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177 #else
178 return false;
179 #endif
182 static bool default_mttcg_enabled(void)
184 QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
185 const char *rr = qemu_opt_get(icount_opts, "rr");
187 if (rr || TCG_OVERSIZED_GUEST) {
188 return false;
189 } else {
190 #ifdef TARGET_SUPPORTS_MTTCG
191 return check_tcg_memory_orders_compatible();
192 #else
193 return false;
194 #endif
198 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
200 const char *t = qemu_opt_get(opts, "thread");
201 if (t) {
202 if (strcmp(t, "multi") == 0) {
203 if (TCG_OVERSIZED_GUEST) {
204 error_setg(errp, "No MTTCG when guest word size > hosts");
205 } else {
206 if (!check_tcg_memory_orders_compatible()) {
207 error_report("Guest expects a stronger memory ordering "
208 "than the host provides");
209 error_printf("This may cause strange/hard to debug errors");
211 mttcg_enabled = true;
213 } else if (strcmp(t, "single") == 0) {
214 mttcg_enabled = false;
215 } else {
216 error_setg(errp, "Invalid 'thread' setting %s", t);
218 } else {
219 mttcg_enabled = default_mttcg_enabled();
223 int64_t cpu_get_icount_raw(void)
225 int64_t icount;
226 CPUState *cpu = current_cpu;
228 icount = timers_state.qemu_icount;
229 if (cpu) {
230 if (!cpu->can_do_io) {
231 fprintf(stderr, "Bad icount read\n");
232 exit(1);
234 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
236 return icount;
239 /* Return the virtual CPU time, based on the instruction counter. */
240 static int64_t cpu_get_icount_locked(void)
242 int64_t icount = cpu_get_icount_raw();
243 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
246 int64_t cpu_get_icount(void)
248 int64_t icount;
249 unsigned start;
251 do {
252 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253 icount = cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
256 return icount;
259 int64_t cpu_icount_to_ns(int64_t icount)
261 return icount << icount_time_shift;
264 /* return the time elapsed in VM between vm_start and vm_stop. Unless
265 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
266 * counter.
268 * Caller must hold the BQL
270 int64_t cpu_get_ticks(void)
272 int64_t ticks;
274 if (use_icount) {
275 return cpu_get_icount();
278 ticks = timers_state.cpu_ticks_offset;
279 if (timers_state.cpu_ticks_enabled) {
280 ticks += cpu_get_host_ticks();
283 if (timers_state.cpu_ticks_prev > ticks) {
284 /* Note: non increasing ticks may happen if the host uses
285 software suspend */
286 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
287 ticks = timers_state.cpu_ticks_prev;
290 timers_state.cpu_ticks_prev = ticks;
291 return ticks;
294 static int64_t cpu_get_clock_locked(void)
296 int64_t time;
298 time = timers_state.cpu_clock_offset;
299 if (timers_state.cpu_ticks_enabled) {
300 time += get_clock();
303 return time;
306 /* Return the monotonic time elapsed in VM, i.e.,
307 * the time between vm_start and vm_stop
309 int64_t cpu_get_clock(void)
311 int64_t ti;
312 unsigned start;
314 do {
315 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
316 ti = cpu_get_clock_locked();
317 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
319 return ti;
322 /* enable cpu_get_ticks()
323 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
325 void cpu_enable_ticks(void)
327 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
328 seqlock_write_begin(&timers_state.vm_clock_seqlock);
329 if (!timers_state.cpu_ticks_enabled) {
330 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
331 timers_state.cpu_clock_offset -= get_clock();
332 timers_state.cpu_ticks_enabled = 1;
334 seqlock_write_end(&timers_state.vm_clock_seqlock);
337 /* disable cpu_get_ticks() : the clock is stopped. You must not call
338 * cpu_get_ticks() after that.
339 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
341 void cpu_disable_ticks(void)
343 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
344 seqlock_write_begin(&timers_state.vm_clock_seqlock);
345 if (timers_state.cpu_ticks_enabled) {
346 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
347 timers_state.cpu_clock_offset = cpu_get_clock_locked();
348 timers_state.cpu_ticks_enabled = 0;
350 seqlock_write_end(&timers_state.vm_clock_seqlock);
353 /* Correlation between real and virtual time is always going to be
354 fairly approximate, so ignore small variation.
355 When the guest is idle real and virtual time will be aligned in
356 the IO wait loop. */
357 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
359 static void icount_adjust(void)
361 int64_t cur_time;
362 int64_t cur_icount;
363 int64_t delta;
365 /* Protected by TimersState mutex. */
366 static int64_t last_delta;
368 /* If the VM is not running, then do nothing. */
369 if (!runstate_is_running()) {
370 return;
373 seqlock_write_begin(&timers_state.vm_clock_seqlock);
374 cur_time = cpu_get_clock_locked();
375 cur_icount = cpu_get_icount_locked();
377 delta = cur_icount - cur_time;
378 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
379 if (delta > 0
380 && last_delta + ICOUNT_WOBBLE < delta * 2
381 && icount_time_shift > 0) {
382 /* The guest is getting too far ahead. Slow time down. */
383 icount_time_shift--;
385 if (delta < 0
386 && last_delta - ICOUNT_WOBBLE > delta * 2
387 && icount_time_shift < MAX_ICOUNT_SHIFT) {
388 /* The guest is getting too far behind. Speed time up. */
389 icount_time_shift++;
391 last_delta = delta;
392 timers_state.qemu_icount_bias = cur_icount
393 - (timers_state.qemu_icount << icount_time_shift);
394 seqlock_write_end(&timers_state.vm_clock_seqlock);
397 static void icount_adjust_rt(void *opaque)
399 timer_mod(icount_rt_timer,
400 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
401 icount_adjust();
404 static void icount_adjust_vm(void *opaque)
406 timer_mod(icount_vm_timer,
407 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
408 NANOSECONDS_PER_SECOND / 10);
409 icount_adjust();
412 static int64_t qemu_icount_round(int64_t count)
414 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
417 static void icount_warp_rt(void)
419 unsigned seq;
420 int64_t warp_start;
422 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
423 * changes from -1 to another value, so the race here is okay.
425 do {
426 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
427 warp_start = vm_clock_warp_start;
428 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
430 if (warp_start == -1) {
431 return;
434 seqlock_write_begin(&timers_state.vm_clock_seqlock);
435 if (runstate_is_running()) {
436 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
437 cpu_get_clock_locked());
438 int64_t warp_delta;
440 warp_delta = clock - vm_clock_warp_start;
441 if (use_icount == 2) {
443 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
444 * far ahead of real time.
446 int64_t cur_icount = cpu_get_icount_locked();
447 int64_t delta = clock - cur_icount;
448 warp_delta = MIN(warp_delta, delta);
450 timers_state.qemu_icount_bias += warp_delta;
452 vm_clock_warp_start = -1;
453 seqlock_write_end(&timers_state.vm_clock_seqlock);
455 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
456 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
460 static void icount_timer_cb(void *opaque)
462 /* No need for a checkpoint because the timer already synchronizes
463 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
465 icount_warp_rt();
468 void qtest_clock_warp(int64_t dest)
470 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
471 AioContext *aio_context;
472 assert(qtest_enabled());
473 aio_context = qemu_get_aio_context();
474 while (clock < dest) {
475 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
476 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
478 seqlock_write_begin(&timers_state.vm_clock_seqlock);
479 timers_state.qemu_icount_bias += warp;
480 seqlock_write_end(&timers_state.vm_clock_seqlock);
482 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
483 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
484 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
486 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
489 void qemu_start_warp_timer(void)
491 int64_t clock;
492 int64_t deadline;
494 if (!use_icount) {
495 return;
498 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
499 * do not fire, so computing the deadline does not make sense.
501 if (!runstate_is_running()) {
502 return;
505 /* warp clock deterministically in record/replay mode */
506 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
507 return;
510 if (!all_cpu_threads_idle()) {
511 return;
514 if (qtest_enabled()) {
515 /* When testing, qtest commands advance icount. */
516 return;
519 /* We want to use the earliest deadline from ALL vm_clocks */
520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
521 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
522 if (deadline < 0) {
523 static bool notified;
524 if (!icount_sleep && !notified) {
525 error_report("WARNING: icount sleep disabled and no active timers");
526 notified = true;
528 return;
531 if (deadline > 0) {
533 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
534 * sleep. Otherwise, the CPU might be waiting for a future timer
535 * interrupt to wake it up, but the interrupt never comes because
536 * the vCPU isn't running any insns and thus doesn't advance the
537 * QEMU_CLOCK_VIRTUAL.
539 if (!icount_sleep) {
541 * We never let VCPUs sleep in no sleep icount mode.
542 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
543 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
544 * It is useful when we want a deterministic execution time,
545 * isolated from host latencies.
547 seqlock_write_begin(&timers_state.vm_clock_seqlock);
548 timers_state.qemu_icount_bias += deadline;
549 seqlock_write_end(&timers_state.vm_clock_seqlock);
550 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
551 } else {
553 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
554 * "real" time, (related to the time left until the next event) has
555 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
556 * This avoids that the warps are visible externally; for example,
557 * you will not be sending network packets continuously instead of
558 * every 100ms.
560 seqlock_write_begin(&timers_state.vm_clock_seqlock);
561 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
562 vm_clock_warp_start = clock;
564 seqlock_write_end(&timers_state.vm_clock_seqlock);
565 timer_mod_anticipate(icount_warp_timer, clock + deadline);
567 } else if (deadline == 0) {
568 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
572 static void qemu_account_warp_timer(void)
574 if (!use_icount || !icount_sleep) {
575 return;
578 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
579 * do not fire, so computing the deadline does not make sense.
581 if (!runstate_is_running()) {
582 return;
585 /* warp clock deterministically in record/replay mode */
586 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
587 return;
590 timer_del(icount_warp_timer);
591 icount_warp_rt();
594 static bool icount_state_needed(void *opaque)
596 return use_icount;
600 * This is a subsection for icount migration.
602 static const VMStateDescription icount_vmstate_timers = {
603 .name = "timer/icount",
604 .version_id = 1,
605 .minimum_version_id = 1,
606 .needed = icount_state_needed,
607 .fields = (VMStateField[]) {
608 VMSTATE_INT64(qemu_icount_bias, TimersState),
609 VMSTATE_INT64(qemu_icount, TimersState),
610 VMSTATE_END_OF_LIST()
614 static const VMStateDescription vmstate_timers = {
615 .name = "timer",
616 .version_id = 2,
617 .minimum_version_id = 1,
618 .fields = (VMStateField[]) {
619 VMSTATE_INT64(cpu_ticks_offset, TimersState),
620 VMSTATE_INT64(dummy, TimersState),
621 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
622 VMSTATE_END_OF_LIST()
624 .subsections = (const VMStateDescription*[]) {
625 &icount_vmstate_timers,
626 NULL
630 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
632 double pct;
633 double throttle_ratio;
634 long sleeptime_ns;
636 if (!cpu_throttle_get_percentage()) {
637 return;
640 pct = (double)cpu_throttle_get_percentage()/100;
641 throttle_ratio = pct / (1 - pct);
642 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
644 qemu_mutex_unlock_iothread();
645 atomic_set(&cpu->throttle_thread_scheduled, 0);
646 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
647 qemu_mutex_lock_iothread();
650 static void cpu_throttle_timer_tick(void *opaque)
652 CPUState *cpu;
653 double pct;
655 /* Stop the timer if needed */
656 if (!cpu_throttle_get_percentage()) {
657 return;
659 CPU_FOREACH(cpu) {
660 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
661 async_run_on_cpu(cpu, cpu_throttle_thread,
662 RUN_ON_CPU_NULL);
666 pct = (double)cpu_throttle_get_percentage()/100;
667 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
668 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
671 void cpu_throttle_set(int new_throttle_pct)
673 /* Ensure throttle percentage is within valid range */
674 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
675 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
677 atomic_set(&throttle_percentage, new_throttle_pct);
679 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
680 CPU_THROTTLE_TIMESLICE_NS);
683 void cpu_throttle_stop(void)
685 atomic_set(&throttle_percentage, 0);
688 bool cpu_throttle_active(void)
690 return (cpu_throttle_get_percentage() != 0);
693 int cpu_throttle_get_percentage(void)
695 return atomic_read(&throttle_percentage);
698 void cpu_ticks_init(void)
700 seqlock_init(&timers_state.vm_clock_seqlock);
701 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
702 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
703 cpu_throttle_timer_tick, NULL);
706 void configure_icount(QemuOpts *opts, Error **errp)
708 const char *option;
709 char *rem_str = NULL;
711 option = qemu_opt_get(opts, "shift");
712 if (!option) {
713 if (qemu_opt_get(opts, "align") != NULL) {
714 error_setg(errp, "Please specify shift option when using align");
716 return;
719 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
720 if (icount_sleep) {
721 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
722 icount_timer_cb, NULL);
725 icount_align_option = qemu_opt_get_bool(opts, "align", false);
727 if (icount_align_option && !icount_sleep) {
728 error_setg(errp, "align=on and sleep=off are incompatible");
730 if (strcmp(option, "auto") != 0) {
731 errno = 0;
732 icount_time_shift = strtol(option, &rem_str, 0);
733 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
734 error_setg(errp, "icount: Invalid shift value");
736 use_icount = 1;
737 return;
738 } else if (icount_align_option) {
739 error_setg(errp, "shift=auto and align=on are incompatible");
740 } else if (!icount_sleep) {
741 error_setg(errp, "shift=auto and sleep=off are incompatible");
744 use_icount = 2;
746 /* 125MIPS seems a reasonable initial guess at the guest speed.
747 It will be corrected fairly quickly anyway. */
748 icount_time_shift = 3;
750 /* Have both realtime and virtual time triggers for speed adjustment.
751 The realtime trigger catches emulated time passing too slowly,
752 the virtual time trigger catches emulated time passing too fast.
753 Realtime triggers occur even when idle, so use them less frequently
754 than VM triggers. */
755 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
756 icount_adjust_rt, NULL);
757 timer_mod(icount_rt_timer,
758 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
759 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
760 icount_adjust_vm, NULL);
761 timer_mod(icount_vm_timer,
762 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
763 NANOSECONDS_PER_SECOND / 10);
766 /***********************************************************/
767 /* TCG vCPU kick timer
769 * The kick timer is responsible for moving single threaded vCPU
770 * emulation on to the next vCPU. If more than one vCPU is running a
771 * timer event with force a cpu->exit so the next vCPU can get
772 * scheduled.
774 * The timer is removed if all vCPUs are idle and restarted again once
775 * idleness is complete.
778 static QEMUTimer *tcg_kick_vcpu_timer;
779 static CPUState *tcg_current_rr_cpu;
781 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
783 static inline int64_t qemu_tcg_next_kick(void)
785 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
788 /* Kick the currently round-robin scheduled vCPU */
789 static void qemu_cpu_kick_rr_cpu(void)
791 CPUState *cpu;
792 do {
793 cpu = atomic_mb_read(&tcg_current_rr_cpu);
794 if (cpu) {
795 cpu_exit(cpu);
797 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
800 static void kick_tcg_thread(void *opaque)
802 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
803 qemu_cpu_kick_rr_cpu();
806 static void start_tcg_kick_timer(void)
808 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
809 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
810 kick_tcg_thread, NULL);
811 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
815 static void stop_tcg_kick_timer(void)
817 if (tcg_kick_vcpu_timer) {
818 timer_del(tcg_kick_vcpu_timer);
819 tcg_kick_vcpu_timer = NULL;
823 /***********************************************************/
824 void hw_error(const char *fmt, ...)
826 va_list ap;
827 CPUState *cpu;
829 va_start(ap, fmt);
830 fprintf(stderr, "qemu: hardware error: ");
831 vfprintf(stderr, fmt, ap);
832 fprintf(stderr, "\n");
833 CPU_FOREACH(cpu) {
834 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
835 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
837 va_end(ap);
838 abort();
841 void cpu_synchronize_all_states(void)
843 CPUState *cpu;
845 CPU_FOREACH(cpu) {
846 cpu_synchronize_state(cpu);
850 void cpu_synchronize_all_post_reset(void)
852 CPUState *cpu;
854 CPU_FOREACH(cpu) {
855 cpu_synchronize_post_reset(cpu);
859 void cpu_synchronize_all_post_init(void)
861 CPUState *cpu;
863 CPU_FOREACH(cpu) {
864 cpu_synchronize_post_init(cpu);
868 static int do_vm_stop(RunState state)
870 int ret = 0;
872 if (runstate_is_running()) {
873 cpu_disable_ticks();
874 pause_all_vcpus();
875 runstate_set(state);
876 vm_state_notify(0, state);
877 qapi_event_send_stop(&error_abort);
880 bdrv_drain_all();
881 replay_disable_events();
882 ret = bdrv_flush_all();
884 return ret;
887 static bool cpu_can_run(CPUState *cpu)
889 if (cpu->stop) {
890 return false;
892 if (cpu_is_stopped(cpu)) {
893 return false;
895 return true;
898 static void cpu_handle_guest_debug(CPUState *cpu)
900 gdb_set_stop_cpu(cpu);
901 qemu_system_debug_request();
902 cpu->stopped = true;
905 #ifdef CONFIG_LINUX
906 static void sigbus_reraise(void)
908 sigset_t set;
909 struct sigaction action;
911 memset(&action, 0, sizeof(action));
912 action.sa_handler = SIG_DFL;
913 if (!sigaction(SIGBUS, &action, NULL)) {
914 raise(SIGBUS);
915 sigemptyset(&set);
916 sigaddset(&set, SIGBUS);
917 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
919 perror("Failed to re-raise SIGBUS!\n");
920 abort();
923 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
925 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
926 sigbus_reraise();
929 if (current_cpu) {
930 /* Called asynchronously in VCPU thread. */
931 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
932 sigbus_reraise();
934 } else {
935 /* Called synchronously (via signalfd) in main thread. */
936 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
937 sigbus_reraise();
942 static void qemu_init_sigbus(void)
944 struct sigaction action;
946 memset(&action, 0, sizeof(action));
947 action.sa_flags = SA_SIGINFO;
948 action.sa_sigaction = sigbus_handler;
949 sigaction(SIGBUS, &action, NULL);
951 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
953 #else /* !CONFIG_LINUX */
954 static void qemu_init_sigbus(void)
957 #endif /* !CONFIG_LINUX */
959 static QemuMutex qemu_global_mutex;
961 static QemuThread io_thread;
963 /* cpu creation */
964 static QemuCond qemu_cpu_cond;
965 /* system init */
966 static QemuCond qemu_pause_cond;
968 void qemu_init_cpu_loop(void)
970 qemu_init_sigbus();
971 qemu_cond_init(&qemu_cpu_cond);
972 qemu_cond_init(&qemu_pause_cond);
973 qemu_mutex_init(&qemu_global_mutex);
975 qemu_thread_get_self(&io_thread);
978 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
980 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
983 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
985 if (kvm_destroy_vcpu(cpu) < 0) {
986 error_report("kvm_destroy_vcpu failed");
987 exit(EXIT_FAILURE);
991 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
995 static void qemu_wait_io_event_common(CPUState *cpu)
997 atomic_mb_set(&cpu->thread_kicked, false);
998 if (cpu->stop) {
999 cpu->stop = false;
1000 cpu->stopped = true;
1001 qemu_cond_broadcast(&qemu_pause_cond);
1003 process_queued_cpu_work(cpu);
1006 static bool qemu_tcg_should_sleep(CPUState *cpu)
1008 if (mttcg_enabled) {
1009 return cpu_thread_is_idle(cpu);
1010 } else {
1011 return all_cpu_threads_idle();
1015 static void qemu_tcg_wait_io_event(CPUState *cpu)
1017 while (qemu_tcg_should_sleep(cpu)) {
1018 stop_tcg_kick_timer();
1019 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1022 start_tcg_kick_timer();
1024 qemu_wait_io_event_common(cpu);
1027 static void qemu_kvm_wait_io_event(CPUState *cpu)
1029 while (cpu_thread_is_idle(cpu)) {
1030 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1033 qemu_wait_io_event_common(cpu);
1036 static void *qemu_kvm_cpu_thread_fn(void *arg)
1038 CPUState *cpu = arg;
1039 int r;
1041 rcu_register_thread();
1043 qemu_mutex_lock_iothread();
1044 qemu_thread_get_self(cpu->thread);
1045 cpu->thread_id = qemu_get_thread_id();
1046 cpu->can_do_io = 1;
1047 current_cpu = cpu;
1049 r = kvm_init_vcpu(cpu);
1050 if (r < 0) {
1051 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1052 exit(1);
1055 kvm_init_cpu_signals(cpu);
1057 /* signal CPU creation */
1058 cpu->created = true;
1059 qemu_cond_signal(&qemu_cpu_cond);
1061 do {
1062 if (cpu_can_run(cpu)) {
1063 r = kvm_cpu_exec(cpu);
1064 if (r == EXCP_DEBUG) {
1065 cpu_handle_guest_debug(cpu);
1068 qemu_kvm_wait_io_event(cpu);
1069 } while (!cpu->unplug || cpu_can_run(cpu));
1071 qemu_kvm_destroy_vcpu(cpu);
1072 cpu->created = false;
1073 qemu_cond_signal(&qemu_cpu_cond);
1074 qemu_mutex_unlock_iothread();
1075 return NULL;
1078 static void *qemu_dummy_cpu_thread_fn(void *arg)
1080 #ifdef _WIN32
1081 fprintf(stderr, "qtest is not supported under Windows\n");
1082 exit(1);
1083 #else
1084 CPUState *cpu = arg;
1085 sigset_t waitset;
1086 int r;
1088 rcu_register_thread();
1090 qemu_mutex_lock_iothread();
1091 qemu_thread_get_self(cpu->thread);
1092 cpu->thread_id = qemu_get_thread_id();
1093 cpu->can_do_io = 1;
1094 current_cpu = cpu;
1096 sigemptyset(&waitset);
1097 sigaddset(&waitset, SIG_IPI);
1099 /* signal CPU creation */
1100 cpu->created = true;
1101 qemu_cond_signal(&qemu_cpu_cond);
1103 while (1) {
1104 qemu_mutex_unlock_iothread();
1105 do {
1106 int sig;
1107 r = sigwait(&waitset, &sig);
1108 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1109 if (r == -1) {
1110 perror("sigwait");
1111 exit(1);
1113 qemu_mutex_lock_iothread();
1114 qemu_wait_io_event_common(cpu);
1117 return NULL;
1118 #endif
1121 static int64_t tcg_get_icount_limit(void)
1123 int64_t deadline;
1125 if (replay_mode != REPLAY_MODE_PLAY) {
1126 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1128 /* Maintain prior (possibly buggy) behaviour where if no deadline
1129 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1130 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1131 * nanoseconds.
1133 if ((deadline < 0) || (deadline > INT32_MAX)) {
1134 deadline = INT32_MAX;
1137 return qemu_icount_round(deadline);
1138 } else {
1139 return replay_get_instructions();
1143 static void handle_icount_deadline(void)
1145 if (use_icount) {
1146 int64_t deadline =
1147 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1149 if (deadline == 0) {
1150 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1155 static int tcg_cpu_exec(CPUState *cpu)
1157 int ret;
1158 #ifdef CONFIG_PROFILER
1159 int64_t ti;
1160 #endif
1162 #ifdef CONFIG_PROFILER
1163 ti = profile_getclock();
1164 #endif
1165 if (use_icount) {
1166 int64_t count;
1167 int decr;
1168 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1169 + cpu->icount_extra);
1170 cpu->icount_decr.u16.low = 0;
1171 cpu->icount_extra = 0;
1172 count = tcg_get_icount_limit();
1173 timers_state.qemu_icount += count;
1174 decr = (count > 0xffff) ? 0xffff : count;
1175 count -= decr;
1176 cpu->icount_decr.u16.low = decr;
1177 cpu->icount_extra = count;
1179 qemu_mutex_unlock_iothread();
1180 cpu_exec_start(cpu);
1181 ret = cpu_exec(cpu);
1182 cpu_exec_end(cpu);
1183 qemu_mutex_lock_iothread();
1184 #ifdef CONFIG_PROFILER
1185 tcg_time += profile_getclock() - ti;
1186 #endif
1187 if (use_icount) {
1188 /* Fold pending instructions back into the
1189 instruction counter, and clear the interrupt flag. */
1190 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1191 + cpu->icount_extra);
1192 cpu->icount_decr.u32 = 0;
1193 cpu->icount_extra = 0;
1194 replay_account_executed_instructions();
1196 return ret;
1199 /* Destroy any remaining vCPUs which have been unplugged and have
1200 * finished running
1202 static void deal_with_unplugged_cpus(void)
1204 CPUState *cpu;
1206 CPU_FOREACH(cpu) {
1207 if (cpu->unplug && !cpu_can_run(cpu)) {
1208 qemu_tcg_destroy_vcpu(cpu);
1209 cpu->created = false;
1210 qemu_cond_signal(&qemu_cpu_cond);
1211 break;
1216 /* Single-threaded TCG
1218 * In the single-threaded case each vCPU is simulated in turn. If
1219 * there is more than a single vCPU we create a simple timer to kick
1220 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1221 * This is done explicitly rather than relying on side-effects
1222 * elsewhere.
1225 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1227 CPUState *cpu = arg;
1229 rcu_register_thread();
1231 qemu_mutex_lock_iothread();
1232 qemu_thread_get_self(cpu->thread);
1234 CPU_FOREACH(cpu) {
1235 cpu->thread_id = qemu_get_thread_id();
1236 cpu->created = true;
1237 cpu->can_do_io = 1;
1239 qemu_cond_signal(&qemu_cpu_cond);
1241 /* wait for initial kick-off after machine start */
1242 while (first_cpu->stopped) {
1243 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1245 /* process any pending work */
1246 CPU_FOREACH(cpu) {
1247 current_cpu = cpu;
1248 qemu_wait_io_event_common(cpu);
1252 start_tcg_kick_timer();
1254 cpu = first_cpu;
1256 /* process any pending work */
1257 cpu->exit_request = 1;
1259 while (1) {
1260 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1261 qemu_account_warp_timer();
1263 if (!cpu) {
1264 cpu = first_cpu;
1267 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1269 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1270 current_cpu = cpu;
1272 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1273 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1275 if (cpu_can_run(cpu)) {
1276 int r;
1277 r = tcg_cpu_exec(cpu);
1278 if (r == EXCP_DEBUG) {
1279 cpu_handle_guest_debug(cpu);
1280 break;
1281 } else if (r == EXCP_ATOMIC) {
1282 qemu_mutex_unlock_iothread();
1283 cpu_exec_step_atomic(cpu);
1284 qemu_mutex_lock_iothread();
1285 break;
1287 } else if (cpu->stop) {
1288 if (cpu->unplug) {
1289 cpu = CPU_NEXT(cpu);
1291 break;
1294 cpu = CPU_NEXT(cpu);
1295 } /* while (cpu && !cpu->exit_request).. */
1297 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1298 atomic_set(&tcg_current_rr_cpu, NULL);
1300 if (cpu && cpu->exit_request) {
1301 atomic_mb_set(&cpu->exit_request, 0);
1304 handle_icount_deadline();
1306 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1307 deal_with_unplugged_cpus();
1310 return NULL;
1313 static void *qemu_hax_cpu_thread_fn(void *arg)
1315 CPUState *cpu = arg;
1316 int r;
1317 qemu_thread_get_self(cpu->thread);
1318 qemu_mutex_lock(&qemu_global_mutex);
1320 cpu->thread_id = qemu_get_thread_id();
1321 cpu->created = true;
1322 cpu->halted = 0;
1323 current_cpu = cpu;
1325 hax_init_vcpu(cpu);
1326 qemu_cond_signal(&qemu_cpu_cond);
1328 while (1) {
1329 if (cpu_can_run(cpu)) {
1330 r = hax_smp_cpu_exec(cpu);
1331 if (r == EXCP_DEBUG) {
1332 cpu_handle_guest_debug(cpu);
1336 while (cpu_thread_is_idle(cpu)) {
1337 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1339 #ifdef _WIN32
1340 SleepEx(0, TRUE);
1341 #endif
1342 qemu_wait_io_event_common(cpu);
1344 return NULL;
1347 #ifdef _WIN32
1348 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1351 #endif
1353 /* Multi-threaded TCG
1355 * In the multi-threaded case each vCPU has its own thread. The TLS
1356 * variable current_cpu can be used deep in the code to find the
1357 * current CPUState for a given thread.
1360 static void *qemu_tcg_cpu_thread_fn(void *arg)
1362 CPUState *cpu = arg;
1364 rcu_register_thread();
1366 qemu_mutex_lock_iothread();
1367 qemu_thread_get_self(cpu->thread);
1369 cpu->thread_id = qemu_get_thread_id();
1370 cpu->created = true;
1371 cpu->can_do_io = 1;
1372 current_cpu = cpu;
1373 qemu_cond_signal(&qemu_cpu_cond);
1375 /* process any pending work */
1376 cpu->exit_request = 1;
1378 while (1) {
1379 if (cpu_can_run(cpu)) {
1380 int r;
1381 r = tcg_cpu_exec(cpu);
1382 switch (r) {
1383 case EXCP_DEBUG:
1384 cpu_handle_guest_debug(cpu);
1385 break;
1386 case EXCP_HALTED:
1387 /* during start-up the vCPU is reset and the thread is
1388 * kicked several times. If we don't ensure we go back
1389 * to sleep in the halted state we won't cleanly
1390 * start-up when the vCPU is enabled.
1392 * cpu->halted should ensure we sleep in wait_io_event
1394 g_assert(cpu->halted);
1395 break;
1396 case EXCP_ATOMIC:
1397 qemu_mutex_unlock_iothread();
1398 cpu_exec_step_atomic(cpu);
1399 qemu_mutex_lock_iothread();
1400 default:
1401 /* Ignore everything else? */
1402 break;
1406 handle_icount_deadline();
1408 atomic_mb_set(&cpu->exit_request, 0);
1409 qemu_tcg_wait_io_event(cpu);
1412 return NULL;
1415 static void qemu_cpu_kick_thread(CPUState *cpu)
1417 #ifndef _WIN32
1418 int err;
1420 if (cpu->thread_kicked) {
1421 return;
1423 cpu->thread_kicked = true;
1424 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1425 if (err) {
1426 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1427 exit(1);
1429 #else /* _WIN32 */
1430 if (!qemu_cpu_is_self(cpu)) {
1431 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1432 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1433 __func__, GetLastError());
1434 exit(1);
1437 #endif
1440 void qemu_cpu_kick(CPUState *cpu)
1442 qemu_cond_broadcast(cpu->halt_cond);
1443 if (tcg_enabled()) {
1444 cpu_exit(cpu);
1445 /* NOP unless doing single-thread RR */
1446 qemu_cpu_kick_rr_cpu();
1447 } else {
1448 if (hax_enabled()) {
1450 * FIXME: race condition with the exit_request check in
1451 * hax_vcpu_hax_exec
1453 cpu->exit_request = 1;
1455 qemu_cpu_kick_thread(cpu);
1459 void qemu_cpu_kick_self(void)
1461 assert(current_cpu);
1462 qemu_cpu_kick_thread(current_cpu);
1465 bool qemu_cpu_is_self(CPUState *cpu)
1467 return qemu_thread_is_self(cpu->thread);
1470 bool qemu_in_vcpu_thread(void)
1472 return current_cpu && qemu_cpu_is_self(current_cpu);
1475 static __thread bool iothread_locked = false;
1477 bool qemu_mutex_iothread_locked(void)
1479 return iothread_locked;
1482 void qemu_mutex_lock_iothread(void)
1484 g_assert(!qemu_mutex_iothread_locked());
1485 qemu_mutex_lock(&qemu_global_mutex);
1486 iothread_locked = true;
1489 void qemu_mutex_unlock_iothread(void)
1491 g_assert(qemu_mutex_iothread_locked());
1492 iothread_locked = false;
1493 qemu_mutex_unlock(&qemu_global_mutex);
1496 static bool all_vcpus_paused(void)
1498 CPUState *cpu;
1500 CPU_FOREACH(cpu) {
1501 if (!cpu->stopped) {
1502 return false;
1506 return true;
1509 void pause_all_vcpus(void)
1511 CPUState *cpu;
1513 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1514 CPU_FOREACH(cpu) {
1515 cpu->stop = true;
1516 qemu_cpu_kick(cpu);
1519 if (qemu_in_vcpu_thread()) {
1520 cpu_stop_current();
1523 while (!all_vcpus_paused()) {
1524 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1525 CPU_FOREACH(cpu) {
1526 qemu_cpu_kick(cpu);
1531 void cpu_resume(CPUState *cpu)
1533 cpu->stop = false;
1534 cpu->stopped = false;
1535 qemu_cpu_kick(cpu);
1538 void resume_all_vcpus(void)
1540 CPUState *cpu;
1542 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1543 CPU_FOREACH(cpu) {
1544 cpu_resume(cpu);
1548 void cpu_remove(CPUState *cpu)
1550 cpu->stop = true;
1551 cpu->unplug = true;
1552 qemu_cpu_kick(cpu);
1555 void cpu_remove_sync(CPUState *cpu)
1557 cpu_remove(cpu);
1558 while (cpu->created) {
1559 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1563 /* For temporary buffers for forming a name */
1564 #define VCPU_THREAD_NAME_SIZE 16
1566 static void qemu_tcg_init_vcpu(CPUState *cpu)
1568 char thread_name[VCPU_THREAD_NAME_SIZE];
1569 static QemuCond *single_tcg_halt_cond;
1570 static QemuThread *single_tcg_cpu_thread;
1572 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1573 cpu->thread = g_malloc0(sizeof(QemuThread));
1574 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1575 qemu_cond_init(cpu->halt_cond);
1577 if (qemu_tcg_mttcg_enabled()) {
1578 /* create a thread per vCPU with TCG (MTTCG) */
1579 parallel_cpus = true;
1580 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1581 cpu->cpu_index);
1583 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1584 cpu, QEMU_THREAD_JOINABLE);
1586 } else {
1587 /* share a single thread for all cpus with TCG */
1588 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1589 qemu_thread_create(cpu->thread, thread_name,
1590 qemu_tcg_rr_cpu_thread_fn,
1591 cpu, QEMU_THREAD_JOINABLE);
1593 single_tcg_halt_cond = cpu->halt_cond;
1594 single_tcg_cpu_thread = cpu->thread;
1596 #ifdef _WIN32
1597 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1598 #endif
1599 while (!cpu->created) {
1600 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1602 } else {
1603 /* For non-MTTCG cases we share the thread */
1604 cpu->thread = single_tcg_cpu_thread;
1605 cpu->halt_cond = single_tcg_halt_cond;
1609 static void qemu_hax_start_vcpu(CPUState *cpu)
1611 char thread_name[VCPU_THREAD_NAME_SIZE];
1613 cpu->thread = g_malloc0(sizeof(QemuThread));
1614 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1615 qemu_cond_init(cpu->halt_cond);
1617 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1618 cpu->cpu_index);
1619 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1620 cpu, QEMU_THREAD_JOINABLE);
1621 #ifdef _WIN32
1622 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1623 #endif
1624 while (!cpu->created) {
1625 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1629 static void qemu_kvm_start_vcpu(CPUState *cpu)
1631 char thread_name[VCPU_THREAD_NAME_SIZE];
1633 cpu->thread = g_malloc0(sizeof(QemuThread));
1634 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1635 qemu_cond_init(cpu->halt_cond);
1636 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1637 cpu->cpu_index);
1638 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1639 cpu, QEMU_THREAD_JOINABLE);
1640 while (!cpu->created) {
1641 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1645 static void qemu_dummy_start_vcpu(CPUState *cpu)
1647 char thread_name[VCPU_THREAD_NAME_SIZE];
1649 cpu->thread = g_malloc0(sizeof(QemuThread));
1650 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1651 qemu_cond_init(cpu->halt_cond);
1652 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1653 cpu->cpu_index);
1654 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1655 QEMU_THREAD_JOINABLE);
1656 while (!cpu->created) {
1657 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1661 void qemu_init_vcpu(CPUState *cpu)
1663 cpu->nr_cores = smp_cores;
1664 cpu->nr_threads = smp_threads;
1665 cpu->stopped = true;
1667 if (!cpu->as) {
1668 /* If the target cpu hasn't set up any address spaces itself,
1669 * give it the default one.
1671 AddressSpace *as = address_space_init_shareable(cpu->memory,
1672 "cpu-memory");
1673 cpu->num_ases = 1;
1674 cpu_address_space_init(cpu, as, 0);
1677 if (kvm_enabled()) {
1678 qemu_kvm_start_vcpu(cpu);
1679 } else if (hax_enabled()) {
1680 qemu_hax_start_vcpu(cpu);
1681 } else if (tcg_enabled()) {
1682 qemu_tcg_init_vcpu(cpu);
1683 } else {
1684 qemu_dummy_start_vcpu(cpu);
1688 void cpu_stop_current(void)
1690 if (current_cpu) {
1691 current_cpu->stop = false;
1692 current_cpu->stopped = true;
1693 cpu_exit(current_cpu);
1694 qemu_cond_broadcast(&qemu_pause_cond);
1698 int vm_stop(RunState state)
1700 if (qemu_in_vcpu_thread()) {
1701 qemu_system_vmstop_request_prepare();
1702 qemu_system_vmstop_request(state);
1704 * FIXME: should not return to device code in case
1705 * vm_stop() has been requested.
1707 cpu_stop_current();
1708 return 0;
1711 return do_vm_stop(state);
1715 * Prepare for (re)starting the VM.
1716 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1717 * running or in case of an error condition), 0 otherwise.
1719 int vm_prepare_start(void)
1721 RunState requested;
1722 int res = 0;
1724 qemu_vmstop_requested(&requested);
1725 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1726 return -1;
1729 /* Ensure that a STOP/RESUME pair of events is emitted if a
1730 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1731 * example, according to documentation is always followed by
1732 * the STOP event.
1734 if (runstate_is_running()) {
1735 qapi_event_send_stop(&error_abort);
1736 res = -1;
1737 } else {
1738 replay_enable_events();
1739 cpu_enable_ticks();
1740 runstate_set(RUN_STATE_RUNNING);
1741 vm_state_notify(1, RUN_STATE_RUNNING);
1744 /* We are sending this now, but the CPUs will be resumed shortly later */
1745 qapi_event_send_resume(&error_abort);
1746 return res;
1749 void vm_start(void)
1751 if (!vm_prepare_start()) {
1752 resume_all_vcpus();
1756 /* does a state transition even if the VM is already stopped,
1757 current state is forgotten forever */
1758 int vm_stop_force_state(RunState state)
1760 if (runstate_is_running()) {
1761 return vm_stop(state);
1762 } else {
1763 runstate_set(state);
1765 bdrv_drain_all();
1766 /* Make sure to return an error if the flush in a previous vm_stop()
1767 * failed. */
1768 return bdrv_flush_all();
1772 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1774 /* XXX: implement xxx_cpu_list for targets that still miss it */
1775 #if defined(cpu_list)
1776 cpu_list(f, cpu_fprintf); /* deprecated */
1777 #else
1778 printf("Target ignores cpu selection\n");
1779 #endif
1782 CpuInfoList *qmp_query_cpus(Error **errp)
1784 CpuInfoList *head = NULL, *cur_item = NULL;
1785 CPUState *cpu;
1787 CPU_FOREACH(cpu) {
1788 CpuInfoList *info;
1789 #if defined(TARGET_I386)
1790 X86CPU *x86_cpu = X86_CPU(cpu);
1791 CPUX86State *env = &x86_cpu->env;
1792 #elif defined(TARGET_PPC)
1793 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1794 CPUPPCState *env = &ppc_cpu->env;
1795 #elif defined(TARGET_SPARC)
1796 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1797 CPUSPARCState *env = &sparc_cpu->env;
1798 #elif defined(TARGET_MIPS)
1799 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1800 CPUMIPSState *env = &mips_cpu->env;
1801 #elif defined(TARGET_TRICORE)
1802 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1803 CPUTriCoreState *env = &tricore_cpu->env;
1804 #endif
1806 cpu_synchronize_state(cpu);
1808 info = g_malloc0(sizeof(*info));
1809 info->value = g_malloc0(sizeof(*info->value));
1810 info->value->CPU = cpu->cpu_index;
1811 info->value->current = (cpu == first_cpu);
1812 info->value->halted = cpu->halted;
1813 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1814 info->value->thread_id = cpu->thread_id;
1815 #if defined(TARGET_I386)
1816 info->value->arch = CPU_INFO_ARCH_X86;
1817 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1818 #elif defined(TARGET_PPC)
1819 info->value->arch = CPU_INFO_ARCH_PPC;
1820 info->value->u.ppc.nip = env->nip;
1821 #elif defined(TARGET_SPARC)
1822 info->value->arch = CPU_INFO_ARCH_SPARC;
1823 info->value->u.q_sparc.pc = env->pc;
1824 info->value->u.q_sparc.npc = env->npc;
1825 #elif defined(TARGET_MIPS)
1826 info->value->arch = CPU_INFO_ARCH_MIPS;
1827 info->value->u.q_mips.PC = env->active_tc.PC;
1828 #elif defined(TARGET_TRICORE)
1829 info->value->arch = CPU_INFO_ARCH_TRICORE;
1830 info->value->u.tricore.PC = env->PC;
1831 #else
1832 info->value->arch = CPU_INFO_ARCH_OTHER;
1833 #endif
1835 /* XXX: waiting for the qapi to support GSList */
1836 if (!cur_item) {
1837 head = cur_item = info;
1838 } else {
1839 cur_item->next = info;
1840 cur_item = info;
1844 return head;
1847 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1848 bool has_cpu, int64_t cpu_index, Error **errp)
1850 FILE *f;
1851 uint32_t l;
1852 CPUState *cpu;
1853 uint8_t buf[1024];
1854 int64_t orig_addr = addr, orig_size = size;
1856 if (!has_cpu) {
1857 cpu_index = 0;
1860 cpu = qemu_get_cpu(cpu_index);
1861 if (cpu == NULL) {
1862 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1863 "a CPU number");
1864 return;
1867 f = fopen(filename, "wb");
1868 if (!f) {
1869 error_setg_file_open(errp, errno, filename);
1870 return;
1873 while (size != 0) {
1874 l = sizeof(buf);
1875 if (l > size)
1876 l = size;
1877 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1878 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1879 " specified", orig_addr, orig_size);
1880 goto exit;
1882 if (fwrite(buf, 1, l, f) != l) {
1883 error_setg(errp, QERR_IO_ERROR);
1884 goto exit;
1886 addr += l;
1887 size -= l;
1890 exit:
1891 fclose(f);
1894 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1895 Error **errp)
1897 FILE *f;
1898 uint32_t l;
1899 uint8_t buf[1024];
1901 f = fopen(filename, "wb");
1902 if (!f) {
1903 error_setg_file_open(errp, errno, filename);
1904 return;
1907 while (size != 0) {
1908 l = sizeof(buf);
1909 if (l > size)
1910 l = size;
1911 cpu_physical_memory_read(addr, buf, l);
1912 if (fwrite(buf, 1, l, f) != l) {
1913 error_setg(errp, QERR_IO_ERROR);
1914 goto exit;
1916 addr += l;
1917 size -= l;
1920 exit:
1921 fclose(f);
1924 void qmp_inject_nmi(Error **errp)
1926 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1929 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1931 if (!use_icount) {
1932 return;
1935 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1936 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1937 if (icount_align_option) {
1938 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1939 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1940 } else {
1941 cpu_fprintf(f, "Max guest delay NA\n");
1942 cpu_fprintf(f, "Max guest advance NA\n");