tcg: enable tb_lock() for SoftMMU
[qemu/ar7.git] / cpus.c
blobe165d18785def5ed7f61e4c499e2a276dc45bef6
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
54 #ifndef _WIN32
55 #include "qemu/compatfd.h"
56 #endif
58 #ifdef CONFIG_LINUX
60 #include <sys/prctl.h>
62 #ifndef PR_MCE_KILL
63 #define PR_MCE_KILL 33
64 #endif
66 #ifndef PR_MCE_KILL_SET
67 #define PR_MCE_KILL_SET 1
68 #endif
70 #ifndef PR_MCE_KILL_EARLY
71 #define PR_MCE_KILL_EARLY 1
72 #endif
74 #endif /* CONFIG_LINUX */
76 int64_t max_delay;
77 int64_t max_advance;
79 /* vcpu throttling controls */
80 static QEMUTimer *throttle_timer;
81 static unsigned int throttle_percentage;
83 #define CPU_THROTTLE_PCT_MIN 1
84 #define CPU_THROTTLE_PCT_MAX 99
85 #define CPU_THROTTLE_TIMESLICE_NS 10000000
87 bool cpu_is_stopped(CPUState *cpu)
89 return cpu->stopped || !runstate_is_running();
92 static bool cpu_thread_is_idle(CPUState *cpu)
94 if (cpu->stop || cpu->queued_work_first) {
95 return false;
97 if (cpu_is_stopped(cpu)) {
98 return true;
100 if (!cpu->halted || cpu_has_work(cpu) ||
101 kvm_halt_in_kernel()) {
102 return false;
104 return true;
107 static bool all_cpu_threads_idle(void)
109 CPUState *cpu;
111 CPU_FOREACH(cpu) {
112 if (!cpu_thread_is_idle(cpu)) {
113 return false;
116 return true;
119 /***********************************************************/
120 /* guest cycle counter */
122 /* Protected by TimersState seqlock */
124 static bool icount_sleep = true;
125 static int64_t vm_clock_warp_start = -1;
126 /* Conversion factor from emulated instructions to virtual clock ticks. */
127 static int icount_time_shift;
128 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
129 #define MAX_ICOUNT_SHIFT 10
131 static QEMUTimer *icount_rt_timer;
132 static QEMUTimer *icount_vm_timer;
133 static QEMUTimer *icount_warp_timer;
135 typedef struct TimersState {
136 /* Protected by BQL. */
137 int64_t cpu_ticks_prev;
138 int64_t cpu_ticks_offset;
140 /* cpu_clock_offset can be read out of BQL, so protect it with
141 * this lock.
143 QemuSeqLock vm_clock_seqlock;
144 int64_t cpu_clock_offset;
145 int32_t cpu_ticks_enabled;
146 int64_t dummy;
148 /* Compensate for varying guest execution speed. */
149 int64_t qemu_icount_bias;
150 /* Only written by TCG thread */
151 int64_t qemu_icount;
152 } TimersState;
154 static TimersState timers_state;
155 bool mttcg_enabled;
158 * We default to false if we know other options have been enabled
159 * which are currently incompatible with MTTCG. Otherwise when each
160 * guest (target) has been updated to support:
161 * - atomic instructions
162 * - memory ordering primitives (barriers)
163 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
165 * Once a guest architecture has been converted to the new primitives
166 * there are two remaining limitations to check.
168 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
169 * - The host must have a stronger memory order than the guest
171 * It may be possible in future to support strong guests on weak hosts
172 * but that will require tagging all load/stores in a guest with their
173 * implicit memory order requirements which would likely slow things
174 * down a lot.
177 static bool check_tcg_memory_orders_compatible(void)
179 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
180 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
181 #else
182 return false;
183 #endif
186 static bool default_mttcg_enabled(void)
188 QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
189 const char *rr = qemu_opt_get(icount_opts, "rr");
191 if (rr || TCG_OVERSIZED_GUEST) {
192 return false;
193 } else {
194 #ifdef TARGET_SUPPORTS_MTTCG
195 return check_tcg_memory_orders_compatible();
196 #else
197 return false;
198 #endif
202 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
204 const char *t = qemu_opt_get(opts, "thread");
205 if (t) {
206 if (strcmp(t, "multi") == 0) {
207 if (TCG_OVERSIZED_GUEST) {
208 error_setg(errp, "No MTTCG when guest word size > hosts");
209 } else {
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
213 error_printf("This may cause strange/hard to debug errors");
215 mttcg_enabled = true;
217 } else if (strcmp(t, "single") == 0) {
218 mttcg_enabled = false;
219 } else {
220 error_setg(errp, "Invalid 'thread' setting %s", t);
222 } else {
223 mttcg_enabled = default_mttcg_enabled();
227 int64_t cpu_get_icount_raw(void)
229 int64_t icount;
230 CPUState *cpu = current_cpu;
232 icount = timers_state.qemu_icount;
233 if (cpu) {
234 if (!cpu->can_do_io) {
235 fprintf(stderr, "Bad icount read\n");
236 exit(1);
238 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
240 return icount;
243 /* Return the virtual CPU time, based on the instruction counter. */
244 static int64_t cpu_get_icount_locked(void)
246 int64_t icount = cpu_get_icount_raw();
247 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
250 int64_t cpu_get_icount(void)
252 int64_t icount;
253 unsigned start;
255 do {
256 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
257 icount = cpu_get_icount_locked();
258 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
260 return icount;
263 int64_t cpu_icount_to_ns(int64_t icount)
265 return icount << icount_time_shift;
268 /* return the time elapsed in VM between vm_start and vm_stop. Unless
269 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
270 * counter.
272 * Caller must hold the BQL
274 int64_t cpu_get_ticks(void)
276 int64_t ticks;
278 if (use_icount) {
279 return cpu_get_icount();
282 ticks = timers_state.cpu_ticks_offset;
283 if (timers_state.cpu_ticks_enabled) {
284 ticks += cpu_get_host_ticks();
287 if (timers_state.cpu_ticks_prev > ticks) {
288 /* Note: non increasing ticks may happen if the host uses
289 software suspend */
290 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
291 ticks = timers_state.cpu_ticks_prev;
294 timers_state.cpu_ticks_prev = ticks;
295 return ticks;
298 static int64_t cpu_get_clock_locked(void)
300 int64_t time;
302 time = timers_state.cpu_clock_offset;
303 if (timers_state.cpu_ticks_enabled) {
304 time += get_clock();
307 return time;
310 /* Return the monotonic time elapsed in VM, i.e.,
311 * the time between vm_start and vm_stop
313 int64_t cpu_get_clock(void)
315 int64_t ti;
316 unsigned start;
318 do {
319 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
320 ti = cpu_get_clock_locked();
321 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323 return ti;
326 /* enable cpu_get_ticks()
327 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
329 void cpu_enable_ticks(void)
331 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
332 seqlock_write_begin(&timers_state.vm_clock_seqlock);
333 if (!timers_state.cpu_ticks_enabled) {
334 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
335 timers_state.cpu_clock_offset -= get_clock();
336 timers_state.cpu_ticks_enabled = 1;
338 seqlock_write_end(&timers_state.vm_clock_seqlock);
341 /* disable cpu_get_ticks() : the clock is stopped. You must not call
342 * cpu_get_ticks() after that.
343 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
345 void cpu_disable_ticks(void)
347 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
348 seqlock_write_begin(&timers_state.vm_clock_seqlock);
349 if (timers_state.cpu_ticks_enabled) {
350 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
351 timers_state.cpu_clock_offset = cpu_get_clock_locked();
352 timers_state.cpu_ticks_enabled = 0;
354 seqlock_write_end(&timers_state.vm_clock_seqlock);
357 /* Correlation between real and virtual time is always going to be
358 fairly approximate, so ignore small variation.
359 When the guest is idle real and virtual time will be aligned in
360 the IO wait loop. */
361 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
363 static void icount_adjust(void)
365 int64_t cur_time;
366 int64_t cur_icount;
367 int64_t delta;
369 /* Protected by TimersState mutex. */
370 static int64_t last_delta;
372 /* If the VM is not running, then do nothing. */
373 if (!runstate_is_running()) {
374 return;
377 seqlock_write_begin(&timers_state.vm_clock_seqlock);
378 cur_time = cpu_get_clock_locked();
379 cur_icount = cpu_get_icount_locked();
381 delta = cur_icount - cur_time;
382 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
383 if (delta > 0
384 && last_delta + ICOUNT_WOBBLE < delta * 2
385 && icount_time_shift > 0) {
386 /* The guest is getting too far ahead. Slow time down. */
387 icount_time_shift--;
389 if (delta < 0
390 && last_delta - ICOUNT_WOBBLE > delta * 2
391 && icount_time_shift < MAX_ICOUNT_SHIFT) {
392 /* The guest is getting too far behind. Speed time up. */
393 icount_time_shift++;
395 last_delta = delta;
396 timers_state.qemu_icount_bias = cur_icount
397 - (timers_state.qemu_icount << icount_time_shift);
398 seqlock_write_end(&timers_state.vm_clock_seqlock);
401 static void icount_adjust_rt(void *opaque)
403 timer_mod(icount_rt_timer,
404 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
405 icount_adjust();
408 static void icount_adjust_vm(void *opaque)
410 timer_mod(icount_vm_timer,
411 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
412 NANOSECONDS_PER_SECOND / 10);
413 icount_adjust();
416 static int64_t qemu_icount_round(int64_t count)
418 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
421 static void icount_warp_rt(void)
423 unsigned seq;
424 int64_t warp_start;
426 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
427 * changes from -1 to another value, so the race here is okay.
429 do {
430 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
431 warp_start = vm_clock_warp_start;
432 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
434 if (warp_start == -1) {
435 return;
438 seqlock_write_begin(&timers_state.vm_clock_seqlock);
439 if (runstate_is_running()) {
440 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
441 cpu_get_clock_locked());
442 int64_t warp_delta;
444 warp_delta = clock - vm_clock_warp_start;
445 if (use_icount == 2) {
447 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
448 * far ahead of real time.
450 int64_t cur_icount = cpu_get_icount_locked();
451 int64_t delta = clock - cur_icount;
452 warp_delta = MIN(warp_delta, delta);
454 timers_state.qemu_icount_bias += warp_delta;
456 vm_clock_warp_start = -1;
457 seqlock_write_end(&timers_state.vm_clock_seqlock);
459 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
460 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
464 static void icount_timer_cb(void *opaque)
466 /* No need for a checkpoint because the timer already synchronizes
467 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
469 icount_warp_rt();
472 void qtest_clock_warp(int64_t dest)
474 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
475 AioContext *aio_context;
476 assert(qtest_enabled());
477 aio_context = qemu_get_aio_context();
478 while (clock < dest) {
479 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
480 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
482 seqlock_write_begin(&timers_state.vm_clock_seqlock);
483 timers_state.qemu_icount_bias += warp;
484 seqlock_write_end(&timers_state.vm_clock_seqlock);
486 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
487 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
488 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
490 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
493 void qemu_start_warp_timer(void)
495 int64_t clock;
496 int64_t deadline;
498 if (!use_icount) {
499 return;
502 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
503 * do not fire, so computing the deadline does not make sense.
505 if (!runstate_is_running()) {
506 return;
509 /* warp clock deterministically in record/replay mode */
510 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
511 return;
514 if (!all_cpu_threads_idle()) {
515 return;
518 if (qtest_enabled()) {
519 /* When testing, qtest commands advance icount. */
520 return;
523 /* We want to use the earliest deadline from ALL vm_clocks */
524 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
525 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
526 if (deadline < 0) {
527 static bool notified;
528 if (!icount_sleep && !notified) {
529 error_report("WARNING: icount sleep disabled and no active timers");
530 notified = true;
532 return;
535 if (deadline > 0) {
537 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
538 * sleep. Otherwise, the CPU might be waiting for a future timer
539 * interrupt to wake it up, but the interrupt never comes because
540 * the vCPU isn't running any insns and thus doesn't advance the
541 * QEMU_CLOCK_VIRTUAL.
543 if (!icount_sleep) {
545 * We never let VCPUs sleep in no sleep icount mode.
546 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
547 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
548 * It is useful when we want a deterministic execution time,
549 * isolated from host latencies.
551 seqlock_write_begin(&timers_state.vm_clock_seqlock);
552 timers_state.qemu_icount_bias += deadline;
553 seqlock_write_end(&timers_state.vm_clock_seqlock);
554 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
555 } else {
557 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
558 * "real" time, (related to the time left until the next event) has
559 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
560 * This avoids that the warps are visible externally; for example,
561 * you will not be sending network packets continuously instead of
562 * every 100ms.
564 seqlock_write_begin(&timers_state.vm_clock_seqlock);
565 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
566 vm_clock_warp_start = clock;
568 seqlock_write_end(&timers_state.vm_clock_seqlock);
569 timer_mod_anticipate(icount_warp_timer, clock + deadline);
571 } else if (deadline == 0) {
572 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
576 static void qemu_account_warp_timer(void)
578 if (!use_icount || !icount_sleep) {
579 return;
582 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
583 * do not fire, so computing the deadline does not make sense.
585 if (!runstate_is_running()) {
586 return;
589 /* warp clock deterministically in record/replay mode */
590 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
591 return;
594 timer_del(icount_warp_timer);
595 icount_warp_rt();
598 static bool icount_state_needed(void *opaque)
600 return use_icount;
604 * This is a subsection for icount migration.
606 static const VMStateDescription icount_vmstate_timers = {
607 .name = "timer/icount",
608 .version_id = 1,
609 .minimum_version_id = 1,
610 .needed = icount_state_needed,
611 .fields = (VMStateField[]) {
612 VMSTATE_INT64(qemu_icount_bias, TimersState),
613 VMSTATE_INT64(qemu_icount, TimersState),
614 VMSTATE_END_OF_LIST()
618 static const VMStateDescription vmstate_timers = {
619 .name = "timer",
620 .version_id = 2,
621 .minimum_version_id = 1,
622 .fields = (VMStateField[]) {
623 VMSTATE_INT64(cpu_ticks_offset, TimersState),
624 VMSTATE_INT64(dummy, TimersState),
625 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
626 VMSTATE_END_OF_LIST()
628 .subsections = (const VMStateDescription*[]) {
629 &icount_vmstate_timers,
630 NULL
634 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
636 double pct;
637 double throttle_ratio;
638 long sleeptime_ns;
640 if (!cpu_throttle_get_percentage()) {
641 return;
644 pct = (double)cpu_throttle_get_percentage()/100;
645 throttle_ratio = pct / (1 - pct);
646 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
648 qemu_mutex_unlock_iothread();
649 atomic_set(&cpu->throttle_thread_scheduled, 0);
650 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
651 qemu_mutex_lock_iothread();
654 static void cpu_throttle_timer_tick(void *opaque)
656 CPUState *cpu;
657 double pct;
659 /* Stop the timer if needed */
660 if (!cpu_throttle_get_percentage()) {
661 return;
663 CPU_FOREACH(cpu) {
664 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
665 async_run_on_cpu(cpu, cpu_throttle_thread,
666 RUN_ON_CPU_NULL);
670 pct = (double)cpu_throttle_get_percentage()/100;
671 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
672 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
675 void cpu_throttle_set(int new_throttle_pct)
677 /* Ensure throttle percentage is within valid range */
678 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
679 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
681 atomic_set(&throttle_percentage, new_throttle_pct);
683 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
684 CPU_THROTTLE_TIMESLICE_NS);
687 void cpu_throttle_stop(void)
689 atomic_set(&throttle_percentage, 0);
692 bool cpu_throttle_active(void)
694 return (cpu_throttle_get_percentage() != 0);
697 int cpu_throttle_get_percentage(void)
699 return atomic_read(&throttle_percentage);
702 void cpu_ticks_init(void)
704 seqlock_init(&timers_state.vm_clock_seqlock);
705 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
706 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
707 cpu_throttle_timer_tick, NULL);
710 void configure_icount(QemuOpts *opts, Error **errp)
712 const char *option;
713 char *rem_str = NULL;
715 option = qemu_opt_get(opts, "shift");
716 if (!option) {
717 if (qemu_opt_get(opts, "align") != NULL) {
718 error_setg(errp, "Please specify shift option when using align");
720 return;
723 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
724 if (icount_sleep) {
725 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
726 icount_timer_cb, NULL);
729 icount_align_option = qemu_opt_get_bool(opts, "align", false);
731 if (icount_align_option && !icount_sleep) {
732 error_setg(errp, "align=on and sleep=off are incompatible");
734 if (strcmp(option, "auto") != 0) {
735 errno = 0;
736 icount_time_shift = strtol(option, &rem_str, 0);
737 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
738 error_setg(errp, "icount: Invalid shift value");
740 use_icount = 1;
741 return;
742 } else if (icount_align_option) {
743 error_setg(errp, "shift=auto and align=on are incompatible");
744 } else if (!icount_sleep) {
745 error_setg(errp, "shift=auto and sleep=off are incompatible");
748 use_icount = 2;
750 /* 125MIPS seems a reasonable initial guess at the guest speed.
751 It will be corrected fairly quickly anyway. */
752 icount_time_shift = 3;
754 /* Have both realtime and virtual time triggers for speed adjustment.
755 The realtime trigger catches emulated time passing too slowly,
756 the virtual time trigger catches emulated time passing too fast.
757 Realtime triggers occur even when idle, so use them less frequently
758 than VM triggers. */
759 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
760 icount_adjust_rt, NULL);
761 timer_mod(icount_rt_timer,
762 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
763 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
764 icount_adjust_vm, NULL);
765 timer_mod(icount_vm_timer,
766 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
767 NANOSECONDS_PER_SECOND / 10);
770 /***********************************************************/
771 /* TCG vCPU kick timer
773 * The kick timer is responsible for moving single threaded vCPU
774 * emulation on to the next vCPU. If more than one vCPU is running a
775 * timer event with force a cpu->exit so the next vCPU can get
776 * scheduled.
778 * The timer is removed if all vCPUs are idle and restarted again once
779 * idleness is complete.
782 static QEMUTimer *tcg_kick_vcpu_timer;
783 static CPUState *tcg_current_rr_cpu;
785 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
787 static inline int64_t qemu_tcg_next_kick(void)
789 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
792 /* Kick the currently round-robin scheduled vCPU */
793 static void qemu_cpu_kick_rr_cpu(void)
795 CPUState *cpu;
796 do {
797 cpu = atomic_mb_read(&tcg_current_rr_cpu);
798 if (cpu) {
799 cpu_exit(cpu);
801 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
804 static void kick_tcg_thread(void *opaque)
806 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
807 qemu_cpu_kick_rr_cpu();
810 static void start_tcg_kick_timer(void)
812 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
813 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
814 kick_tcg_thread, NULL);
815 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
819 static void stop_tcg_kick_timer(void)
821 if (tcg_kick_vcpu_timer) {
822 timer_del(tcg_kick_vcpu_timer);
823 tcg_kick_vcpu_timer = NULL;
827 /***********************************************************/
828 void hw_error(const char *fmt, ...)
830 va_list ap;
831 CPUState *cpu;
833 va_start(ap, fmt);
834 fprintf(stderr, "qemu: hardware error: ");
835 vfprintf(stderr, fmt, ap);
836 fprintf(stderr, "\n");
837 CPU_FOREACH(cpu) {
838 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
839 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
841 va_end(ap);
842 abort();
845 void cpu_synchronize_all_states(void)
847 CPUState *cpu;
849 CPU_FOREACH(cpu) {
850 cpu_synchronize_state(cpu);
854 void cpu_synchronize_all_post_reset(void)
856 CPUState *cpu;
858 CPU_FOREACH(cpu) {
859 cpu_synchronize_post_reset(cpu);
863 void cpu_synchronize_all_post_init(void)
865 CPUState *cpu;
867 CPU_FOREACH(cpu) {
868 cpu_synchronize_post_init(cpu);
872 static int do_vm_stop(RunState state)
874 int ret = 0;
876 if (runstate_is_running()) {
877 cpu_disable_ticks();
878 pause_all_vcpus();
879 runstate_set(state);
880 vm_state_notify(0, state);
881 qapi_event_send_stop(&error_abort);
884 bdrv_drain_all();
885 replay_disable_events();
886 ret = bdrv_flush_all();
888 return ret;
891 static bool cpu_can_run(CPUState *cpu)
893 if (cpu->stop) {
894 return false;
896 if (cpu_is_stopped(cpu)) {
897 return false;
899 return true;
902 static void cpu_handle_guest_debug(CPUState *cpu)
904 gdb_set_stop_cpu(cpu);
905 qemu_system_debug_request();
906 cpu->stopped = true;
909 #ifdef CONFIG_LINUX
910 static void sigbus_reraise(void)
912 sigset_t set;
913 struct sigaction action;
915 memset(&action, 0, sizeof(action));
916 action.sa_handler = SIG_DFL;
917 if (!sigaction(SIGBUS, &action, NULL)) {
918 raise(SIGBUS);
919 sigemptyset(&set);
920 sigaddset(&set, SIGBUS);
921 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
923 perror("Failed to re-raise SIGBUS!\n");
924 abort();
927 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
928 void *ctx)
930 if (kvm_on_sigbus(siginfo->ssi_code,
931 (void *)(intptr_t)siginfo->ssi_addr)) {
932 sigbus_reraise();
936 static void qemu_init_sigbus(void)
938 struct sigaction action;
940 memset(&action, 0, sizeof(action));
941 action.sa_flags = SA_SIGINFO;
942 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
943 sigaction(SIGBUS, &action, NULL);
945 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
948 static void qemu_kvm_eat_signals(CPUState *cpu)
950 struct timespec ts = { 0, 0 };
951 siginfo_t siginfo;
952 sigset_t waitset;
953 sigset_t chkset;
954 int r;
956 sigemptyset(&waitset);
957 sigaddset(&waitset, SIG_IPI);
958 sigaddset(&waitset, SIGBUS);
960 do {
961 r = sigtimedwait(&waitset, &siginfo, &ts);
962 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
963 perror("sigtimedwait");
964 exit(1);
967 switch (r) {
968 case SIGBUS:
969 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
970 sigbus_reraise();
972 break;
973 default:
974 break;
977 r = sigpending(&chkset);
978 if (r == -1) {
979 perror("sigpending");
980 exit(1);
982 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
985 #else /* !CONFIG_LINUX */
987 static void qemu_init_sigbus(void)
991 static void qemu_kvm_eat_signals(CPUState *cpu)
994 #endif /* !CONFIG_LINUX */
996 #ifndef _WIN32
997 static void dummy_signal(int sig)
1001 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
1003 int r;
1004 sigset_t set;
1005 struct sigaction sigact;
1007 memset(&sigact, 0, sizeof(sigact));
1008 sigact.sa_handler = dummy_signal;
1009 sigaction(SIG_IPI, &sigact, NULL);
1011 pthread_sigmask(SIG_BLOCK, NULL, &set);
1012 sigdelset(&set, SIG_IPI);
1013 sigdelset(&set, SIGBUS);
1014 r = kvm_set_signal_mask(cpu, &set);
1015 if (r) {
1016 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
1017 exit(1);
1021 #else /* _WIN32 */
1022 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
1024 abort();
1026 #endif /* _WIN32 */
1028 static QemuMutex qemu_global_mutex;
1030 static QemuThread io_thread;
1032 /* cpu creation */
1033 static QemuCond qemu_cpu_cond;
1034 /* system init */
1035 static QemuCond qemu_pause_cond;
1037 void qemu_init_cpu_loop(void)
1039 qemu_init_sigbus();
1040 qemu_cond_init(&qemu_cpu_cond);
1041 qemu_cond_init(&qemu_pause_cond);
1042 qemu_mutex_init(&qemu_global_mutex);
1044 qemu_thread_get_self(&io_thread);
1047 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1049 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1052 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1054 if (kvm_destroy_vcpu(cpu) < 0) {
1055 error_report("kvm_destroy_vcpu failed");
1056 exit(EXIT_FAILURE);
1060 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1064 static void qemu_wait_io_event_common(CPUState *cpu)
1066 if (cpu->stop) {
1067 cpu->stop = false;
1068 cpu->stopped = true;
1069 qemu_cond_broadcast(&qemu_pause_cond);
1071 process_queued_cpu_work(cpu);
1072 cpu->thread_kicked = false;
1075 static void qemu_tcg_wait_io_event(CPUState *cpu)
1077 while (all_cpu_threads_idle()) {
1078 stop_tcg_kick_timer();
1079 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1082 start_tcg_kick_timer();
1084 CPU_FOREACH(cpu) {
1085 qemu_wait_io_event_common(cpu);
1089 static void qemu_kvm_wait_io_event(CPUState *cpu)
1091 while (cpu_thread_is_idle(cpu)) {
1092 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1095 qemu_kvm_eat_signals(cpu);
1096 qemu_wait_io_event_common(cpu);
1099 static void *qemu_kvm_cpu_thread_fn(void *arg)
1101 CPUState *cpu = arg;
1102 int r;
1104 rcu_register_thread();
1106 qemu_mutex_lock_iothread();
1107 qemu_thread_get_self(cpu->thread);
1108 cpu->thread_id = qemu_get_thread_id();
1109 cpu->can_do_io = 1;
1110 current_cpu = cpu;
1112 r = kvm_init_vcpu(cpu);
1113 if (r < 0) {
1114 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1115 exit(1);
1118 qemu_kvm_init_cpu_signals(cpu);
1120 /* signal CPU creation */
1121 cpu->created = true;
1122 qemu_cond_signal(&qemu_cpu_cond);
1124 do {
1125 if (cpu_can_run(cpu)) {
1126 r = kvm_cpu_exec(cpu);
1127 if (r == EXCP_DEBUG) {
1128 cpu_handle_guest_debug(cpu);
1131 qemu_kvm_wait_io_event(cpu);
1132 } while (!cpu->unplug || cpu_can_run(cpu));
1134 qemu_kvm_destroy_vcpu(cpu);
1135 cpu->created = false;
1136 qemu_cond_signal(&qemu_cpu_cond);
1137 qemu_mutex_unlock_iothread();
1138 return NULL;
1141 static void *qemu_dummy_cpu_thread_fn(void *arg)
1143 #ifdef _WIN32
1144 fprintf(stderr, "qtest is not supported under Windows\n");
1145 exit(1);
1146 #else
1147 CPUState *cpu = arg;
1148 sigset_t waitset;
1149 int r;
1151 rcu_register_thread();
1153 qemu_mutex_lock_iothread();
1154 qemu_thread_get_self(cpu->thread);
1155 cpu->thread_id = qemu_get_thread_id();
1156 cpu->can_do_io = 1;
1158 sigemptyset(&waitset);
1159 sigaddset(&waitset, SIG_IPI);
1161 /* signal CPU creation */
1162 cpu->created = true;
1163 qemu_cond_signal(&qemu_cpu_cond);
1165 current_cpu = cpu;
1166 while (1) {
1167 current_cpu = NULL;
1168 qemu_mutex_unlock_iothread();
1169 do {
1170 int sig;
1171 r = sigwait(&waitset, &sig);
1172 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1173 if (r == -1) {
1174 perror("sigwait");
1175 exit(1);
1177 qemu_mutex_lock_iothread();
1178 current_cpu = cpu;
1179 qemu_wait_io_event_common(cpu);
1182 return NULL;
1183 #endif
1186 static int64_t tcg_get_icount_limit(void)
1188 int64_t deadline;
1190 if (replay_mode != REPLAY_MODE_PLAY) {
1191 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1193 /* Maintain prior (possibly buggy) behaviour where if no deadline
1194 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1195 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1196 * nanoseconds.
1198 if ((deadline < 0) || (deadline > INT32_MAX)) {
1199 deadline = INT32_MAX;
1202 return qemu_icount_round(deadline);
1203 } else {
1204 return replay_get_instructions();
1208 static void handle_icount_deadline(void)
1210 if (use_icount) {
1211 int64_t deadline =
1212 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1214 if (deadline == 0) {
1215 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1220 static int tcg_cpu_exec(CPUState *cpu)
1222 int ret;
1223 #ifdef CONFIG_PROFILER
1224 int64_t ti;
1225 #endif
1227 #ifdef CONFIG_PROFILER
1228 ti = profile_getclock();
1229 #endif
1230 if (use_icount) {
1231 int64_t count;
1232 int decr;
1233 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1234 + cpu->icount_extra);
1235 cpu->icount_decr.u16.low = 0;
1236 cpu->icount_extra = 0;
1237 count = tcg_get_icount_limit();
1238 timers_state.qemu_icount += count;
1239 decr = (count > 0xffff) ? 0xffff : count;
1240 count -= decr;
1241 cpu->icount_decr.u16.low = decr;
1242 cpu->icount_extra = count;
1244 qemu_mutex_unlock_iothread();
1245 cpu_exec_start(cpu);
1246 ret = cpu_exec(cpu);
1247 cpu_exec_end(cpu);
1248 qemu_mutex_lock_iothread();
1249 #ifdef CONFIG_PROFILER
1250 tcg_time += profile_getclock() - ti;
1251 #endif
1252 if (use_icount) {
1253 /* Fold pending instructions back into the
1254 instruction counter, and clear the interrupt flag. */
1255 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1256 + cpu->icount_extra);
1257 cpu->icount_decr.u32 = 0;
1258 cpu->icount_extra = 0;
1259 replay_account_executed_instructions();
1261 return ret;
1264 /* Destroy any remaining vCPUs which have been unplugged and have
1265 * finished running
1267 static void deal_with_unplugged_cpus(void)
1269 CPUState *cpu;
1271 CPU_FOREACH(cpu) {
1272 if (cpu->unplug && !cpu_can_run(cpu)) {
1273 qemu_tcg_destroy_vcpu(cpu);
1274 cpu->created = false;
1275 qemu_cond_signal(&qemu_cpu_cond);
1276 break;
1281 /* Single-threaded TCG
1283 * In the single-threaded case each vCPU is simulated in turn. If
1284 * there is more than a single vCPU we create a simple timer to kick
1285 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1286 * This is done explicitly rather than relying on side-effects
1287 * elsewhere.
1290 static void *qemu_tcg_cpu_thread_fn(void *arg)
1292 CPUState *cpu = arg;
1294 rcu_register_thread();
1296 qemu_mutex_lock_iothread();
1297 qemu_thread_get_self(cpu->thread);
1299 CPU_FOREACH(cpu) {
1300 cpu->thread_id = qemu_get_thread_id();
1301 cpu->created = true;
1302 cpu->can_do_io = 1;
1304 qemu_cond_signal(&qemu_cpu_cond);
1306 /* wait for initial kick-off after machine start */
1307 while (first_cpu->stopped) {
1308 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1310 /* process any pending work */
1311 CPU_FOREACH(cpu) {
1312 qemu_wait_io_event_common(cpu);
1316 start_tcg_kick_timer();
1318 cpu = first_cpu;
1320 /* process any pending work */
1321 cpu->exit_request = 1;
1323 while (1) {
1324 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1325 qemu_account_warp_timer();
1327 if (!cpu) {
1328 cpu = first_cpu;
1331 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1333 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1335 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1336 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1338 if (cpu_can_run(cpu)) {
1339 int r;
1340 r = tcg_cpu_exec(cpu);
1341 if (r == EXCP_DEBUG) {
1342 cpu_handle_guest_debug(cpu);
1343 break;
1345 } else if (cpu->stop || cpu->stopped) {
1346 if (cpu->unplug) {
1347 cpu = CPU_NEXT(cpu);
1349 break;
1352 cpu = CPU_NEXT(cpu);
1353 } /* while (cpu && !cpu->exit_request).. */
1355 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1356 atomic_set(&tcg_current_rr_cpu, NULL);
1358 if (cpu && cpu->exit_request) {
1359 atomic_mb_set(&cpu->exit_request, 0);
1362 handle_icount_deadline();
1364 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1365 deal_with_unplugged_cpus();
1368 return NULL;
1371 static void *qemu_hax_cpu_thread_fn(void *arg)
1373 CPUState *cpu = arg;
1374 int r;
1375 qemu_thread_get_self(cpu->thread);
1376 qemu_mutex_lock(&qemu_global_mutex);
1378 cpu->thread_id = qemu_get_thread_id();
1379 cpu->created = true;
1380 cpu->halted = 0;
1381 current_cpu = cpu;
1383 hax_init_vcpu(cpu);
1384 qemu_cond_signal(&qemu_cpu_cond);
1386 while (1) {
1387 if (cpu_can_run(cpu)) {
1388 r = hax_smp_cpu_exec(cpu);
1389 if (r == EXCP_DEBUG) {
1390 cpu_handle_guest_debug(cpu);
1394 while (cpu_thread_is_idle(cpu)) {
1395 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1397 #ifdef _WIN32
1398 SleepEx(0, TRUE);
1399 #endif
1400 qemu_wait_io_event_common(cpu);
1402 return NULL;
1405 #ifdef _WIN32
1406 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1409 #endif
1411 static void qemu_cpu_kick_thread(CPUState *cpu)
1413 #ifndef _WIN32
1414 int err;
1416 if (cpu->thread_kicked) {
1417 return;
1419 cpu->thread_kicked = true;
1420 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1421 if (err) {
1422 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1423 exit(1);
1425 #else /* _WIN32 */
1426 if (!qemu_cpu_is_self(cpu)) {
1427 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1428 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1429 __func__, GetLastError());
1430 exit(1);
1433 #endif
1436 void qemu_cpu_kick(CPUState *cpu)
1438 qemu_cond_broadcast(cpu->halt_cond);
1439 if (tcg_enabled()) {
1440 cpu_exit(cpu);
1441 /* Also ensure current RR cpu is kicked */
1442 qemu_cpu_kick_rr_cpu();
1443 } else {
1444 if (hax_enabled()) {
1446 * FIXME: race condition with the exit_request check in
1447 * hax_vcpu_hax_exec
1449 cpu->exit_request = 1;
1451 qemu_cpu_kick_thread(cpu);
1455 void qemu_cpu_kick_self(void)
1457 assert(current_cpu);
1458 qemu_cpu_kick_thread(current_cpu);
1461 bool qemu_cpu_is_self(CPUState *cpu)
1463 return qemu_thread_is_self(cpu->thread);
1466 bool qemu_in_vcpu_thread(void)
1468 return current_cpu && qemu_cpu_is_self(current_cpu);
1471 static __thread bool iothread_locked = false;
1473 bool qemu_mutex_iothread_locked(void)
1475 return iothread_locked;
1478 void qemu_mutex_lock_iothread(void)
1480 g_assert(!qemu_mutex_iothread_locked());
1481 qemu_mutex_lock(&qemu_global_mutex);
1482 iothread_locked = true;
1485 void qemu_mutex_unlock_iothread(void)
1487 g_assert(qemu_mutex_iothread_locked());
1488 iothread_locked = false;
1489 qemu_mutex_unlock(&qemu_global_mutex);
1492 static bool all_vcpus_paused(void)
1494 CPUState *cpu;
1496 CPU_FOREACH(cpu) {
1497 if (!cpu->stopped) {
1498 return false;
1502 return true;
1505 void pause_all_vcpus(void)
1507 CPUState *cpu;
1509 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1510 CPU_FOREACH(cpu) {
1511 cpu->stop = true;
1512 qemu_cpu_kick(cpu);
1515 if (qemu_in_vcpu_thread()) {
1516 cpu_stop_current();
1517 if (!kvm_enabled()) {
1518 CPU_FOREACH(cpu) {
1519 cpu->stop = false;
1520 cpu->stopped = true;
1522 return;
1526 while (!all_vcpus_paused()) {
1527 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1528 CPU_FOREACH(cpu) {
1529 qemu_cpu_kick(cpu);
1534 void cpu_resume(CPUState *cpu)
1536 cpu->stop = false;
1537 cpu->stopped = false;
1538 qemu_cpu_kick(cpu);
1541 void resume_all_vcpus(void)
1543 CPUState *cpu;
1545 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1546 CPU_FOREACH(cpu) {
1547 cpu_resume(cpu);
1551 void cpu_remove(CPUState *cpu)
1553 cpu->stop = true;
1554 cpu->unplug = true;
1555 qemu_cpu_kick(cpu);
1558 void cpu_remove_sync(CPUState *cpu)
1560 cpu_remove(cpu);
1561 while (cpu->created) {
1562 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1566 /* For temporary buffers for forming a name */
1567 #define VCPU_THREAD_NAME_SIZE 16
1569 static void qemu_tcg_init_vcpu(CPUState *cpu)
1571 char thread_name[VCPU_THREAD_NAME_SIZE];
1572 static QemuCond *tcg_halt_cond;
1573 static QemuThread *tcg_cpu_thread;
1575 /* share a single thread for all cpus with TCG */
1576 if (!tcg_cpu_thread) {
1577 cpu->thread = g_malloc0(sizeof(QemuThread));
1578 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1579 qemu_cond_init(cpu->halt_cond);
1580 tcg_halt_cond = cpu->halt_cond;
1581 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1582 cpu->cpu_index);
1583 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1584 cpu, QEMU_THREAD_JOINABLE);
1585 #ifdef _WIN32
1586 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1587 #endif
1588 while (!cpu->created) {
1589 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1591 tcg_cpu_thread = cpu->thread;
1592 } else {
1593 cpu->thread = tcg_cpu_thread;
1594 cpu->halt_cond = tcg_halt_cond;
1598 static void qemu_hax_start_vcpu(CPUState *cpu)
1600 char thread_name[VCPU_THREAD_NAME_SIZE];
1602 cpu->thread = g_malloc0(sizeof(QemuThread));
1603 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1604 qemu_cond_init(cpu->halt_cond);
1606 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1607 cpu->cpu_index);
1608 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1609 cpu, QEMU_THREAD_JOINABLE);
1610 #ifdef _WIN32
1611 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1612 #endif
1613 while (!cpu->created) {
1614 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1618 static void qemu_kvm_start_vcpu(CPUState *cpu)
1620 char thread_name[VCPU_THREAD_NAME_SIZE];
1622 cpu->thread = g_malloc0(sizeof(QemuThread));
1623 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1624 qemu_cond_init(cpu->halt_cond);
1625 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1626 cpu->cpu_index);
1627 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1628 cpu, QEMU_THREAD_JOINABLE);
1629 while (!cpu->created) {
1630 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1634 static void qemu_dummy_start_vcpu(CPUState *cpu)
1636 char thread_name[VCPU_THREAD_NAME_SIZE];
1638 cpu->thread = g_malloc0(sizeof(QemuThread));
1639 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1640 qemu_cond_init(cpu->halt_cond);
1641 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1642 cpu->cpu_index);
1643 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1644 QEMU_THREAD_JOINABLE);
1645 while (!cpu->created) {
1646 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1650 void qemu_init_vcpu(CPUState *cpu)
1652 cpu->nr_cores = smp_cores;
1653 cpu->nr_threads = smp_threads;
1654 cpu->stopped = true;
1656 if (!cpu->as) {
1657 /* If the target cpu hasn't set up any address spaces itself,
1658 * give it the default one.
1660 AddressSpace *as = address_space_init_shareable(cpu->memory,
1661 "cpu-memory");
1662 cpu->num_ases = 1;
1663 cpu_address_space_init(cpu, as, 0);
1666 if (kvm_enabled()) {
1667 qemu_kvm_start_vcpu(cpu);
1668 } else if (hax_enabled()) {
1669 qemu_hax_start_vcpu(cpu);
1670 } else if (tcg_enabled()) {
1671 qemu_tcg_init_vcpu(cpu);
1672 } else {
1673 qemu_dummy_start_vcpu(cpu);
1677 void cpu_stop_current(void)
1679 if (current_cpu) {
1680 current_cpu->stop = false;
1681 current_cpu->stopped = true;
1682 cpu_exit(current_cpu);
1683 qemu_cond_broadcast(&qemu_pause_cond);
1687 int vm_stop(RunState state)
1689 if (qemu_in_vcpu_thread()) {
1690 qemu_system_vmstop_request_prepare();
1691 qemu_system_vmstop_request(state);
1693 * FIXME: should not return to device code in case
1694 * vm_stop() has been requested.
1696 cpu_stop_current();
1697 return 0;
1700 return do_vm_stop(state);
1704 * Prepare for (re)starting the VM.
1705 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1706 * running or in case of an error condition), 0 otherwise.
1708 int vm_prepare_start(void)
1710 RunState requested;
1711 int res = 0;
1713 qemu_vmstop_requested(&requested);
1714 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1715 return -1;
1718 /* Ensure that a STOP/RESUME pair of events is emitted if a
1719 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1720 * example, according to documentation is always followed by
1721 * the STOP event.
1723 if (runstate_is_running()) {
1724 qapi_event_send_stop(&error_abort);
1725 res = -1;
1726 } else {
1727 replay_enable_events();
1728 cpu_enable_ticks();
1729 runstate_set(RUN_STATE_RUNNING);
1730 vm_state_notify(1, RUN_STATE_RUNNING);
1733 /* We are sending this now, but the CPUs will be resumed shortly later */
1734 qapi_event_send_resume(&error_abort);
1735 return res;
1738 void vm_start(void)
1740 if (!vm_prepare_start()) {
1741 resume_all_vcpus();
1745 /* does a state transition even if the VM is already stopped,
1746 current state is forgotten forever */
1747 int vm_stop_force_state(RunState state)
1749 if (runstate_is_running()) {
1750 return vm_stop(state);
1751 } else {
1752 runstate_set(state);
1754 bdrv_drain_all();
1755 /* Make sure to return an error if the flush in a previous vm_stop()
1756 * failed. */
1757 return bdrv_flush_all();
1761 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1763 /* XXX: implement xxx_cpu_list for targets that still miss it */
1764 #if defined(cpu_list)
1765 cpu_list(f, cpu_fprintf);
1766 #endif
1769 CpuInfoList *qmp_query_cpus(Error **errp)
1771 CpuInfoList *head = NULL, *cur_item = NULL;
1772 CPUState *cpu;
1774 CPU_FOREACH(cpu) {
1775 CpuInfoList *info;
1776 #if defined(TARGET_I386)
1777 X86CPU *x86_cpu = X86_CPU(cpu);
1778 CPUX86State *env = &x86_cpu->env;
1779 #elif defined(TARGET_PPC)
1780 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1781 CPUPPCState *env = &ppc_cpu->env;
1782 #elif defined(TARGET_SPARC)
1783 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1784 CPUSPARCState *env = &sparc_cpu->env;
1785 #elif defined(TARGET_MIPS)
1786 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1787 CPUMIPSState *env = &mips_cpu->env;
1788 #elif defined(TARGET_TRICORE)
1789 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1790 CPUTriCoreState *env = &tricore_cpu->env;
1791 #endif
1793 cpu_synchronize_state(cpu);
1795 info = g_malloc0(sizeof(*info));
1796 info->value = g_malloc0(sizeof(*info->value));
1797 info->value->CPU = cpu->cpu_index;
1798 info->value->current = (cpu == first_cpu);
1799 info->value->halted = cpu->halted;
1800 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1801 info->value->thread_id = cpu->thread_id;
1802 #if defined(TARGET_I386)
1803 info->value->arch = CPU_INFO_ARCH_X86;
1804 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1805 #elif defined(TARGET_PPC)
1806 info->value->arch = CPU_INFO_ARCH_PPC;
1807 info->value->u.ppc.nip = env->nip;
1808 #elif defined(TARGET_SPARC)
1809 info->value->arch = CPU_INFO_ARCH_SPARC;
1810 info->value->u.q_sparc.pc = env->pc;
1811 info->value->u.q_sparc.npc = env->npc;
1812 #elif defined(TARGET_MIPS)
1813 info->value->arch = CPU_INFO_ARCH_MIPS;
1814 info->value->u.q_mips.PC = env->active_tc.PC;
1815 #elif defined(TARGET_TRICORE)
1816 info->value->arch = CPU_INFO_ARCH_TRICORE;
1817 info->value->u.tricore.PC = env->PC;
1818 #else
1819 info->value->arch = CPU_INFO_ARCH_OTHER;
1820 #endif
1822 /* XXX: waiting for the qapi to support GSList */
1823 if (!cur_item) {
1824 head = cur_item = info;
1825 } else {
1826 cur_item->next = info;
1827 cur_item = info;
1831 return head;
1834 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1835 bool has_cpu, int64_t cpu_index, Error **errp)
1837 FILE *f;
1838 uint32_t l;
1839 CPUState *cpu;
1840 uint8_t buf[1024];
1841 int64_t orig_addr = addr, orig_size = size;
1843 if (!has_cpu) {
1844 cpu_index = 0;
1847 cpu = qemu_get_cpu(cpu_index);
1848 if (cpu == NULL) {
1849 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1850 "a CPU number");
1851 return;
1854 f = fopen(filename, "wb");
1855 if (!f) {
1856 error_setg_file_open(errp, errno, filename);
1857 return;
1860 while (size != 0) {
1861 l = sizeof(buf);
1862 if (l > size)
1863 l = size;
1864 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1865 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1866 " specified", orig_addr, orig_size);
1867 goto exit;
1869 if (fwrite(buf, 1, l, f) != l) {
1870 error_setg(errp, QERR_IO_ERROR);
1871 goto exit;
1873 addr += l;
1874 size -= l;
1877 exit:
1878 fclose(f);
1881 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1882 Error **errp)
1884 FILE *f;
1885 uint32_t l;
1886 uint8_t buf[1024];
1888 f = fopen(filename, "wb");
1889 if (!f) {
1890 error_setg_file_open(errp, errno, filename);
1891 return;
1894 while (size != 0) {
1895 l = sizeof(buf);
1896 if (l > size)
1897 l = size;
1898 cpu_physical_memory_read(addr, buf, l);
1899 if (fwrite(buf, 1, l, f) != l) {
1900 error_setg(errp, QERR_IO_ERROR);
1901 goto exit;
1903 addr += l;
1904 size -= l;
1907 exit:
1908 fclose(f);
1911 void qmp_inject_nmi(Error **errp)
1913 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1916 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1918 if (!use_icount) {
1919 return;
1922 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1923 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1924 if (icount_align_option) {
1925 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1926 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1927 } else {
1928 cpu_fprintf(f, "Max guest delay NA\n");
1929 cpu_fprintf(f, "Max guest advance NA\n");