Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[qemu.git] / cpus.c
blob69e21858b8090bee35fa52ee4ea39efc20929816
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
54 #ifdef CONFIG_LINUX
56 #include <sys/prctl.h>
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
70 #endif /* CONFIG_LINUX */
72 int64_t max_delay;
73 int64_t max_advance;
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState *cpu)
85 return cpu->stopped || !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState *cpu)
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
93 if (cpu_is_stopped(cpu)) {
94 return true;
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
100 return true;
103 static bool all_cpu_threads_idle(void)
105 CPUState *cpu;
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
112 return true;
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
150 static TimersState timers_state;
151 bool mttcg_enabled;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177 #else
178 return false;
179 #endif
182 static bool default_mttcg_enabled(void)
184 if (use_icount || TCG_OVERSIZED_GUEST) {
185 return false;
186 } else {
187 #ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
189 #else
190 return false;
191 #endif
195 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
197 const char *t = qemu_opt_get(opts, "thread");
198 if (t) {
199 if (strcmp(t, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST) {
201 error_setg(errp, "No MTTCG when guest word size > hosts");
202 } else if (use_icount) {
203 error_setg(errp, "No MTTCG when icount is enabled");
204 } else {
205 #ifndef TARGET_SUPPORT_MTTCG
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
208 #endif
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
212 error_printf("This may cause strange/hard to debug errors");
214 mttcg_enabled = true;
216 } else if (strcmp(t, "single") == 0) {
217 mttcg_enabled = false;
218 } else {
219 error_setg(errp, "Invalid 'thread' setting %s", t);
221 } else {
222 mttcg_enabled = default_mttcg_enabled();
226 int64_t cpu_get_icount_raw(void)
228 int64_t icount;
229 CPUState *cpu = current_cpu;
231 icount = timers_state.qemu_icount;
232 if (cpu) {
233 if (!cpu->can_do_io) {
234 fprintf(stderr, "Bad icount read\n");
235 exit(1);
237 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
239 return icount;
242 /* Return the virtual CPU time, based on the instruction counter. */
243 static int64_t cpu_get_icount_locked(void)
245 int64_t icount = cpu_get_icount_raw();
246 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
249 int64_t cpu_get_icount(void)
251 int64_t icount;
252 unsigned start;
254 do {
255 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
256 icount = cpu_get_icount_locked();
257 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
259 return icount;
262 int64_t cpu_icount_to_ns(int64_t icount)
264 return icount << icount_time_shift;
267 /* return the time elapsed in VM between vm_start and vm_stop. Unless
268 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
269 * counter.
271 * Caller must hold the BQL
273 int64_t cpu_get_ticks(void)
275 int64_t ticks;
277 if (use_icount) {
278 return cpu_get_icount();
281 ticks = timers_state.cpu_ticks_offset;
282 if (timers_state.cpu_ticks_enabled) {
283 ticks += cpu_get_host_ticks();
286 if (timers_state.cpu_ticks_prev > ticks) {
287 /* Note: non increasing ticks may happen if the host uses
288 software suspend */
289 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
290 ticks = timers_state.cpu_ticks_prev;
293 timers_state.cpu_ticks_prev = ticks;
294 return ticks;
297 static int64_t cpu_get_clock_locked(void)
299 int64_t time;
301 time = timers_state.cpu_clock_offset;
302 if (timers_state.cpu_ticks_enabled) {
303 time += get_clock();
306 return time;
309 /* Return the monotonic time elapsed in VM, i.e.,
310 * the time between vm_start and vm_stop
312 int64_t cpu_get_clock(void)
314 int64_t ti;
315 unsigned start;
317 do {
318 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
319 ti = cpu_get_clock_locked();
320 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
322 return ti;
325 /* enable cpu_get_ticks()
326 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
328 void cpu_enable_ticks(void)
330 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
331 seqlock_write_begin(&timers_state.vm_clock_seqlock);
332 if (!timers_state.cpu_ticks_enabled) {
333 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
334 timers_state.cpu_clock_offset -= get_clock();
335 timers_state.cpu_ticks_enabled = 1;
337 seqlock_write_end(&timers_state.vm_clock_seqlock);
340 /* disable cpu_get_ticks() : the clock is stopped. You must not call
341 * cpu_get_ticks() after that.
342 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
344 void cpu_disable_ticks(void)
346 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
347 seqlock_write_begin(&timers_state.vm_clock_seqlock);
348 if (timers_state.cpu_ticks_enabled) {
349 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
350 timers_state.cpu_clock_offset = cpu_get_clock_locked();
351 timers_state.cpu_ticks_enabled = 0;
353 seqlock_write_end(&timers_state.vm_clock_seqlock);
356 /* Correlation between real and virtual time is always going to be
357 fairly approximate, so ignore small variation.
358 When the guest is idle real and virtual time will be aligned in
359 the IO wait loop. */
360 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
362 static void icount_adjust(void)
364 int64_t cur_time;
365 int64_t cur_icount;
366 int64_t delta;
368 /* Protected by TimersState mutex. */
369 static int64_t last_delta;
371 /* If the VM is not running, then do nothing. */
372 if (!runstate_is_running()) {
373 return;
376 seqlock_write_begin(&timers_state.vm_clock_seqlock);
377 cur_time = cpu_get_clock_locked();
378 cur_icount = cpu_get_icount_locked();
380 delta = cur_icount - cur_time;
381 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
382 if (delta > 0
383 && last_delta + ICOUNT_WOBBLE < delta * 2
384 && icount_time_shift > 0) {
385 /* The guest is getting too far ahead. Slow time down. */
386 icount_time_shift--;
388 if (delta < 0
389 && last_delta - ICOUNT_WOBBLE > delta * 2
390 && icount_time_shift < MAX_ICOUNT_SHIFT) {
391 /* The guest is getting too far behind. Speed time up. */
392 icount_time_shift++;
394 last_delta = delta;
395 timers_state.qemu_icount_bias = cur_icount
396 - (timers_state.qemu_icount << icount_time_shift);
397 seqlock_write_end(&timers_state.vm_clock_seqlock);
400 static void icount_adjust_rt(void *opaque)
402 timer_mod(icount_rt_timer,
403 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
404 icount_adjust();
407 static void icount_adjust_vm(void *opaque)
409 timer_mod(icount_vm_timer,
410 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
411 NANOSECONDS_PER_SECOND / 10);
412 icount_adjust();
415 static int64_t qemu_icount_round(int64_t count)
417 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
420 static void icount_warp_rt(void)
422 unsigned seq;
423 int64_t warp_start;
425 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
426 * changes from -1 to another value, so the race here is okay.
428 do {
429 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
430 warp_start = vm_clock_warp_start;
431 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
433 if (warp_start == -1) {
434 return;
437 seqlock_write_begin(&timers_state.vm_clock_seqlock);
438 if (runstate_is_running()) {
439 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
440 cpu_get_clock_locked());
441 int64_t warp_delta;
443 warp_delta = clock - vm_clock_warp_start;
444 if (use_icount == 2) {
446 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
447 * far ahead of real time.
449 int64_t cur_icount = cpu_get_icount_locked();
450 int64_t delta = clock - cur_icount;
451 warp_delta = MIN(warp_delta, delta);
453 timers_state.qemu_icount_bias += warp_delta;
455 vm_clock_warp_start = -1;
456 seqlock_write_end(&timers_state.vm_clock_seqlock);
458 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
459 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
463 static void icount_timer_cb(void *opaque)
465 /* No need for a checkpoint because the timer already synchronizes
466 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
468 icount_warp_rt();
471 void qtest_clock_warp(int64_t dest)
473 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
474 AioContext *aio_context;
475 assert(qtest_enabled());
476 aio_context = qemu_get_aio_context();
477 while (clock < dest) {
478 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
479 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
481 seqlock_write_begin(&timers_state.vm_clock_seqlock);
482 timers_state.qemu_icount_bias += warp;
483 seqlock_write_end(&timers_state.vm_clock_seqlock);
485 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
486 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
487 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
489 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 void qemu_start_warp_timer(void)
494 int64_t clock;
495 int64_t deadline;
497 if (!use_icount) {
498 return;
501 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
502 * do not fire, so computing the deadline does not make sense.
504 if (!runstate_is_running()) {
505 return;
508 /* warp clock deterministically in record/replay mode */
509 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
510 return;
513 if (!all_cpu_threads_idle()) {
514 return;
517 if (qtest_enabled()) {
518 /* When testing, qtest commands advance icount. */
519 return;
522 /* We want to use the earliest deadline from ALL vm_clocks */
523 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
524 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
525 if (deadline < 0) {
526 static bool notified;
527 if (!icount_sleep && !notified) {
528 error_report("WARNING: icount sleep disabled and no active timers");
529 notified = true;
531 return;
534 if (deadline > 0) {
536 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
537 * sleep. Otherwise, the CPU might be waiting for a future timer
538 * interrupt to wake it up, but the interrupt never comes because
539 * the vCPU isn't running any insns and thus doesn't advance the
540 * QEMU_CLOCK_VIRTUAL.
542 if (!icount_sleep) {
544 * We never let VCPUs sleep in no sleep icount mode.
545 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
546 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
547 * It is useful when we want a deterministic execution time,
548 * isolated from host latencies.
550 seqlock_write_begin(&timers_state.vm_clock_seqlock);
551 timers_state.qemu_icount_bias += deadline;
552 seqlock_write_end(&timers_state.vm_clock_seqlock);
553 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
554 } else {
556 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
557 * "real" time, (related to the time left until the next event) has
558 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
559 * This avoids that the warps are visible externally; for example,
560 * you will not be sending network packets continuously instead of
561 * every 100ms.
563 seqlock_write_begin(&timers_state.vm_clock_seqlock);
564 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
565 vm_clock_warp_start = clock;
567 seqlock_write_end(&timers_state.vm_clock_seqlock);
568 timer_mod_anticipate(icount_warp_timer, clock + deadline);
570 } else if (deadline == 0) {
571 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
575 static void qemu_account_warp_timer(void)
577 if (!use_icount || !icount_sleep) {
578 return;
581 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
582 * do not fire, so computing the deadline does not make sense.
584 if (!runstate_is_running()) {
585 return;
588 /* warp clock deterministically in record/replay mode */
589 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
590 return;
593 timer_del(icount_warp_timer);
594 icount_warp_rt();
597 static bool icount_state_needed(void *opaque)
599 return use_icount;
603 * This is a subsection for icount migration.
605 static const VMStateDescription icount_vmstate_timers = {
606 .name = "timer/icount",
607 .version_id = 1,
608 .minimum_version_id = 1,
609 .needed = icount_state_needed,
610 .fields = (VMStateField[]) {
611 VMSTATE_INT64(qemu_icount_bias, TimersState),
612 VMSTATE_INT64(qemu_icount, TimersState),
613 VMSTATE_END_OF_LIST()
617 static const VMStateDescription vmstate_timers = {
618 .name = "timer",
619 .version_id = 2,
620 .minimum_version_id = 1,
621 .fields = (VMStateField[]) {
622 VMSTATE_INT64(cpu_ticks_offset, TimersState),
623 VMSTATE_INT64(dummy, TimersState),
624 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
625 VMSTATE_END_OF_LIST()
627 .subsections = (const VMStateDescription*[]) {
628 &icount_vmstate_timers,
629 NULL
633 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
635 double pct;
636 double throttle_ratio;
637 long sleeptime_ns;
639 if (!cpu_throttle_get_percentage()) {
640 return;
643 pct = (double)cpu_throttle_get_percentage()/100;
644 throttle_ratio = pct / (1 - pct);
645 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
647 qemu_mutex_unlock_iothread();
648 atomic_set(&cpu->throttle_thread_scheduled, 0);
649 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
650 qemu_mutex_lock_iothread();
653 static void cpu_throttle_timer_tick(void *opaque)
655 CPUState *cpu;
656 double pct;
658 /* Stop the timer if needed */
659 if (!cpu_throttle_get_percentage()) {
660 return;
662 CPU_FOREACH(cpu) {
663 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
664 async_run_on_cpu(cpu, cpu_throttle_thread,
665 RUN_ON_CPU_NULL);
669 pct = (double)cpu_throttle_get_percentage()/100;
670 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
671 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
674 void cpu_throttle_set(int new_throttle_pct)
676 /* Ensure throttle percentage is within valid range */
677 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
678 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
680 atomic_set(&throttle_percentage, new_throttle_pct);
682 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
683 CPU_THROTTLE_TIMESLICE_NS);
686 void cpu_throttle_stop(void)
688 atomic_set(&throttle_percentage, 0);
691 bool cpu_throttle_active(void)
693 return (cpu_throttle_get_percentage() != 0);
696 int cpu_throttle_get_percentage(void)
698 return atomic_read(&throttle_percentage);
701 void cpu_ticks_init(void)
703 seqlock_init(&timers_state.vm_clock_seqlock);
704 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
705 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
706 cpu_throttle_timer_tick, NULL);
709 void configure_icount(QemuOpts *opts, Error **errp)
711 const char *option;
712 char *rem_str = NULL;
714 option = qemu_opt_get(opts, "shift");
715 if (!option) {
716 if (qemu_opt_get(opts, "align") != NULL) {
717 error_setg(errp, "Please specify shift option when using align");
719 return;
722 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
723 if (icount_sleep) {
724 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
725 icount_timer_cb, NULL);
728 icount_align_option = qemu_opt_get_bool(opts, "align", false);
730 if (icount_align_option && !icount_sleep) {
731 error_setg(errp, "align=on and sleep=off are incompatible");
733 if (strcmp(option, "auto") != 0) {
734 errno = 0;
735 icount_time_shift = strtol(option, &rem_str, 0);
736 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
737 error_setg(errp, "icount: Invalid shift value");
739 use_icount = 1;
740 return;
741 } else if (icount_align_option) {
742 error_setg(errp, "shift=auto and align=on are incompatible");
743 } else if (!icount_sleep) {
744 error_setg(errp, "shift=auto and sleep=off are incompatible");
747 use_icount = 2;
749 /* 125MIPS seems a reasonable initial guess at the guest speed.
750 It will be corrected fairly quickly anyway. */
751 icount_time_shift = 3;
753 /* Have both realtime and virtual time triggers for speed adjustment.
754 The realtime trigger catches emulated time passing too slowly,
755 the virtual time trigger catches emulated time passing too fast.
756 Realtime triggers occur even when idle, so use them less frequently
757 than VM triggers. */
758 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
759 icount_adjust_rt, NULL);
760 timer_mod(icount_rt_timer,
761 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
762 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
763 icount_adjust_vm, NULL);
764 timer_mod(icount_vm_timer,
765 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
766 NANOSECONDS_PER_SECOND / 10);
769 /***********************************************************/
770 /* TCG vCPU kick timer
772 * The kick timer is responsible for moving single threaded vCPU
773 * emulation on to the next vCPU. If more than one vCPU is running a
774 * timer event with force a cpu->exit so the next vCPU can get
775 * scheduled.
777 * The timer is removed if all vCPUs are idle and restarted again once
778 * idleness is complete.
781 static QEMUTimer *tcg_kick_vcpu_timer;
782 static CPUState *tcg_current_rr_cpu;
784 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
786 static inline int64_t qemu_tcg_next_kick(void)
788 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
791 /* Kick the currently round-robin scheduled vCPU */
792 static void qemu_cpu_kick_rr_cpu(void)
794 CPUState *cpu;
795 do {
796 cpu = atomic_mb_read(&tcg_current_rr_cpu);
797 if (cpu) {
798 cpu_exit(cpu);
800 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
803 static void kick_tcg_thread(void *opaque)
805 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
806 qemu_cpu_kick_rr_cpu();
809 static void start_tcg_kick_timer(void)
811 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
812 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
813 kick_tcg_thread, NULL);
814 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
818 static void stop_tcg_kick_timer(void)
820 if (tcg_kick_vcpu_timer) {
821 timer_del(tcg_kick_vcpu_timer);
822 tcg_kick_vcpu_timer = NULL;
826 /***********************************************************/
827 void hw_error(const char *fmt, ...)
829 va_list ap;
830 CPUState *cpu;
832 va_start(ap, fmt);
833 fprintf(stderr, "qemu: hardware error: ");
834 vfprintf(stderr, fmt, ap);
835 fprintf(stderr, "\n");
836 CPU_FOREACH(cpu) {
837 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
838 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
840 va_end(ap);
841 abort();
844 void cpu_synchronize_all_states(void)
846 CPUState *cpu;
848 CPU_FOREACH(cpu) {
849 cpu_synchronize_state(cpu);
853 void cpu_synchronize_all_post_reset(void)
855 CPUState *cpu;
857 CPU_FOREACH(cpu) {
858 cpu_synchronize_post_reset(cpu);
862 void cpu_synchronize_all_post_init(void)
864 CPUState *cpu;
866 CPU_FOREACH(cpu) {
867 cpu_synchronize_post_init(cpu);
871 static int do_vm_stop(RunState state)
873 int ret = 0;
875 if (runstate_is_running()) {
876 cpu_disable_ticks();
877 pause_all_vcpus();
878 runstate_set(state);
879 vm_state_notify(0, state);
880 qapi_event_send_stop(&error_abort);
883 bdrv_drain_all();
884 replay_disable_events();
885 ret = bdrv_flush_all();
887 return ret;
890 static bool cpu_can_run(CPUState *cpu)
892 if (cpu->stop) {
893 return false;
895 if (cpu_is_stopped(cpu)) {
896 return false;
898 return true;
901 static void cpu_handle_guest_debug(CPUState *cpu)
903 gdb_set_stop_cpu(cpu);
904 qemu_system_debug_request();
905 cpu->stopped = true;
908 #ifdef CONFIG_LINUX
909 static void sigbus_reraise(void)
911 sigset_t set;
912 struct sigaction action;
914 memset(&action, 0, sizeof(action));
915 action.sa_handler = SIG_DFL;
916 if (!sigaction(SIGBUS, &action, NULL)) {
917 raise(SIGBUS);
918 sigemptyset(&set);
919 sigaddset(&set, SIGBUS);
920 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
922 perror("Failed to re-raise SIGBUS!\n");
923 abort();
926 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
928 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
929 sigbus_reraise();
932 if (current_cpu) {
933 /* Called asynchronously in VCPU thread. */
934 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
935 sigbus_reraise();
937 } else {
938 /* Called synchronously (via signalfd) in main thread. */
939 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
940 sigbus_reraise();
945 static void qemu_init_sigbus(void)
947 struct sigaction action;
949 memset(&action, 0, sizeof(action));
950 action.sa_flags = SA_SIGINFO;
951 action.sa_sigaction = sigbus_handler;
952 sigaction(SIGBUS, &action, NULL);
954 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
956 #else /* !CONFIG_LINUX */
957 static void qemu_init_sigbus(void)
960 #endif /* !CONFIG_LINUX */
962 static QemuMutex qemu_global_mutex;
964 static QemuThread io_thread;
966 /* cpu creation */
967 static QemuCond qemu_cpu_cond;
968 /* system init */
969 static QemuCond qemu_pause_cond;
971 void qemu_init_cpu_loop(void)
973 qemu_init_sigbus();
974 qemu_cond_init(&qemu_cpu_cond);
975 qemu_cond_init(&qemu_pause_cond);
976 qemu_mutex_init(&qemu_global_mutex);
978 qemu_thread_get_self(&io_thread);
981 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
983 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
986 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
988 if (kvm_destroy_vcpu(cpu) < 0) {
989 error_report("kvm_destroy_vcpu failed");
990 exit(EXIT_FAILURE);
994 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
998 static void qemu_wait_io_event_common(CPUState *cpu)
1000 atomic_mb_set(&cpu->thread_kicked, false);
1001 if (cpu->stop) {
1002 cpu->stop = false;
1003 cpu->stopped = true;
1004 qemu_cond_broadcast(&qemu_pause_cond);
1006 process_queued_cpu_work(cpu);
1009 static bool qemu_tcg_should_sleep(CPUState *cpu)
1011 if (mttcg_enabled) {
1012 return cpu_thread_is_idle(cpu);
1013 } else {
1014 return all_cpu_threads_idle();
1018 static void qemu_tcg_wait_io_event(CPUState *cpu)
1020 while (qemu_tcg_should_sleep(cpu)) {
1021 stop_tcg_kick_timer();
1022 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1025 start_tcg_kick_timer();
1027 qemu_wait_io_event_common(cpu);
1030 static void qemu_kvm_wait_io_event(CPUState *cpu)
1032 while (cpu_thread_is_idle(cpu)) {
1033 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1036 qemu_wait_io_event_common(cpu);
1039 static void *qemu_kvm_cpu_thread_fn(void *arg)
1041 CPUState *cpu = arg;
1042 int r;
1044 rcu_register_thread();
1046 qemu_mutex_lock_iothread();
1047 qemu_thread_get_self(cpu->thread);
1048 cpu->thread_id = qemu_get_thread_id();
1049 cpu->can_do_io = 1;
1050 current_cpu = cpu;
1052 r = kvm_init_vcpu(cpu);
1053 if (r < 0) {
1054 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1055 exit(1);
1058 kvm_init_cpu_signals(cpu);
1060 /* signal CPU creation */
1061 cpu->created = true;
1062 qemu_cond_signal(&qemu_cpu_cond);
1064 do {
1065 if (cpu_can_run(cpu)) {
1066 r = kvm_cpu_exec(cpu);
1067 if (r == EXCP_DEBUG) {
1068 cpu_handle_guest_debug(cpu);
1071 qemu_kvm_wait_io_event(cpu);
1072 } while (!cpu->unplug || cpu_can_run(cpu));
1074 qemu_kvm_destroy_vcpu(cpu);
1075 cpu->created = false;
1076 qemu_cond_signal(&qemu_cpu_cond);
1077 qemu_mutex_unlock_iothread();
1078 return NULL;
1081 static void *qemu_dummy_cpu_thread_fn(void *arg)
1083 #ifdef _WIN32
1084 fprintf(stderr, "qtest is not supported under Windows\n");
1085 exit(1);
1086 #else
1087 CPUState *cpu = arg;
1088 sigset_t waitset;
1089 int r;
1091 rcu_register_thread();
1093 qemu_mutex_lock_iothread();
1094 qemu_thread_get_self(cpu->thread);
1095 cpu->thread_id = qemu_get_thread_id();
1096 cpu->can_do_io = 1;
1097 current_cpu = cpu;
1099 sigemptyset(&waitset);
1100 sigaddset(&waitset, SIG_IPI);
1102 /* signal CPU creation */
1103 cpu->created = true;
1104 qemu_cond_signal(&qemu_cpu_cond);
1106 while (1) {
1107 qemu_mutex_unlock_iothread();
1108 do {
1109 int sig;
1110 r = sigwait(&waitset, &sig);
1111 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1112 if (r == -1) {
1113 perror("sigwait");
1114 exit(1);
1116 qemu_mutex_lock_iothread();
1117 qemu_wait_io_event_common(cpu);
1120 return NULL;
1121 #endif
1124 static int64_t tcg_get_icount_limit(void)
1126 int64_t deadline;
1128 if (replay_mode != REPLAY_MODE_PLAY) {
1129 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1131 /* Maintain prior (possibly buggy) behaviour where if no deadline
1132 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1133 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1134 * nanoseconds.
1136 if ((deadline < 0) || (deadline > INT32_MAX)) {
1137 deadline = INT32_MAX;
1140 return qemu_icount_round(deadline);
1141 } else {
1142 return replay_get_instructions();
1146 static void handle_icount_deadline(void)
1148 if (use_icount) {
1149 int64_t deadline =
1150 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1152 if (deadline == 0) {
1153 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1158 static int tcg_cpu_exec(CPUState *cpu)
1160 int ret;
1161 #ifdef CONFIG_PROFILER
1162 int64_t ti;
1163 #endif
1165 #ifdef CONFIG_PROFILER
1166 ti = profile_getclock();
1167 #endif
1168 if (use_icount) {
1169 int64_t count;
1170 int decr;
1171 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1172 + cpu->icount_extra);
1173 cpu->icount_decr.u16.low = 0;
1174 cpu->icount_extra = 0;
1175 count = tcg_get_icount_limit();
1176 timers_state.qemu_icount += count;
1177 decr = (count > 0xffff) ? 0xffff : count;
1178 count -= decr;
1179 cpu->icount_decr.u16.low = decr;
1180 cpu->icount_extra = count;
1182 qemu_mutex_unlock_iothread();
1183 cpu_exec_start(cpu);
1184 ret = cpu_exec(cpu);
1185 cpu_exec_end(cpu);
1186 qemu_mutex_lock_iothread();
1187 #ifdef CONFIG_PROFILER
1188 tcg_time += profile_getclock() - ti;
1189 #endif
1190 if (use_icount) {
1191 /* Fold pending instructions back into the
1192 instruction counter, and clear the interrupt flag. */
1193 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1194 + cpu->icount_extra);
1195 cpu->icount_decr.u32 = 0;
1196 cpu->icount_extra = 0;
1197 replay_account_executed_instructions();
1199 return ret;
1202 /* Destroy any remaining vCPUs which have been unplugged and have
1203 * finished running
1205 static void deal_with_unplugged_cpus(void)
1207 CPUState *cpu;
1209 CPU_FOREACH(cpu) {
1210 if (cpu->unplug && !cpu_can_run(cpu)) {
1211 qemu_tcg_destroy_vcpu(cpu);
1212 cpu->created = false;
1213 qemu_cond_signal(&qemu_cpu_cond);
1214 break;
1219 /* Single-threaded TCG
1221 * In the single-threaded case each vCPU is simulated in turn. If
1222 * there is more than a single vCPU we create a simple timer to kick
1223 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1224 * This is done explicitly rather than relying on side-effects
1225 * elsewhere.
1228 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1230 CPUState *cpu = arg;
1232 rcu_register_thread();
1234 qemu_mutex_lock_iothread();
1235 qemu_thread_get_self(cpu->thread);
1237 CPU_FOREACH(cpu) {
1238 cpu->thread_id = qemu_get_thread_id();
1239 cpu->created = true;
1240 cpu->can_do_io = 1;
1242 qemu_cond_signal(&qemu_cpu_cond);
1244 /* wait for initial kick-off after machine start */
1245 while (first_cpu->stopped) {
1246 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1248 /* process any pending work */
1249 CPU_FOREACH(cpu) {
1250 current_cpu = cpu;
1251 qemu_wait_io_event_common(cpu);
1255 start_tcg_kick_timer();
1257 cpu = first_cpu;
1259 /* process any pending work */
1260 cpu->exit_request = 1;
1262 while (1) {
1263 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1264 qemu_account_warp_timer();
1266 if (!cpu) {
1267 cpu = first_cpu;
1270 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1272 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1273 current_cpu = cpu;
1275 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1276 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1278 if (cpu_can_run(cpu)) {
1279 int r;
1280 r = tcg_cpu_exec(cpu);
1281 if (r == EXCP_DEBUG) {
1282 cpu_handle_guest_debug(cpu);
1283 break;
1284 } else if (r == EXCP_ATOMIC) {
1285 qemu_mutex_unlock_iothread();
1286 cpu_exec_step_atomic(cpu);
1287 qemu_mutex_lock_iothread();
1288 break;
1290 } else if (cpu->stop) {
1291 if (cpu->unplug) {
1292 cpu = CPU_NEXT(cpu);
1294 break;
1297 cpu = CPU_NEXT(cpu);
1298 } /* while (cpu && !cpu->exit_request).. */
1300 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1301 atomic_set(&tcg_current_rr_cpu, NULL);
1303 if (cpu && cpu->exit_request) {
1304 atomic_mb_set(&cpu->exit_request, 0);
1307 handle_icount_deadline();
1309 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1310 deal_with_unplugged_cpus();
1313 return NULL;
1316 static void *qemu_hax_cpu_thread_fn(void *arg)
1318 CPUState *cpu = arg;
1319 int r;
1320 qemu_thread_get_self(cpu->thread);
1321 qemu_mutex_lock(&qemu_global_mutex);
1323 cpu->thread_id = qemu_get_thread_id();
1324 cpu->created = true;
1325 cpu->halted = 0;
1326 current_cpu = cpu;
1328 hax_init_vcpu(cpu);
1329 qemu_cond_signal(&qemu_cpu_cond);
1331 while (1) {
1332 if (cpu_can_run(cpu)) {
1333 r = hax_smp_cpu_exec(cpu);
1334 if (r == EXCP_DEBUG) {
1335 cpu_handle_guest_debug(cpu);
1339 while (cpu_thread_is_idle(cpu)) {
1340 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1342 #ifdef _WIN32
1343 SleepEx(0, TRUE);
1344 #endif
1345 qemu_wait_io_event_common(cpu);
1347 return NULL;
1350 #ifdef _WIN32
1351 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1354 #endif
1356 /* Multi-threaded TCG
1358 * In the multi-threaded case each vCPU has its own thread. The TLS
1359 * variable current_cpu can be used deep in the code to find the
1360 * current CPUState for a given thread.
1363 static void *qemu_tcg_cpu_thread_fn(void *arg)
1365 CPUState *cpu = arg;
1367 rcu_register_thread();
1369 qemu_mutex_lock_iothread();
1370 qemu_thread_get_self(cpu->thread);
1372 cpu->thread_id = qemu_get_thread_id();
1373 cpu->created = true;
1374 cpu->can_do_io = 1;
1375 current_cpu = cpu;
1376 qemu_cond_signal(&qemu_cpu_cond);
1378 /* process any pending work */
1379 cpu->exit_request = 1;
1381 while (1) {
1382 if (cpu_can_run(cpu)) {
1383 int r;
1384 r = tcg_cpu_exec(cpu);
1385 switch (r) {
1386 case EXCP_DEBUG:
1387 cpu_handle_guest_debug(cpu);
1388 break;
1389 case EXCP_HALTED:
1390 /* during start-up the vCPU is reset and the thread is
1391 * kicked several times. If we don't ensure we go back
1392 * to sleep in the halted state we won't cleanly
1393 * start-up when the vCPU is enabled.
1395 * cpu->halted should ensure we sleep in wait_io_event
1397 g_assert(cpu->halted);
1398 break;
1399 case EXCP_ATOMIC:
1400 qemu_mutex_unlock_iothread();
1401 cpu_exec_step_atomic(cpu);
1402 qemu_mutex_lock_iothread();
1403 default:
1404 /* Ignore everything else? */
1405 break;
1409 handle_icount_deadline();
1411 atomic_mb_set(&cpu->exit_request, 0);
1412 qemu_tcg_wait_io_event(cpu);
1415 return NULL;
1418 static void qemu_cpu_kick_thread(CPUState *cpu)
1420 #ifndef _WIN32
1421 int err;
1423 if (cpu->thread_kicked) {
1424 return;
1426 cpu->thread_kicked = true;
1427 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1428 if (err) {
1429 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1430 exit(1);
1432 #else /* _WIN32 */
1433 if (!qemu_cpu_is_self(cpu)) {
1434 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1435 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1436 __func__, GetLastError());
1437 exit(1);
1440 #endif
1443 void qemu_cpu_kick(CPUState *cpu)
1445 qemu_cond_broadcast(cpu->halt_cond);
1446 if (tcg_enabled()) {
1447 cpu_exit(cpu);
1448 /* NOP unless doing single-thread RR */
1449 qemu_cpu_kick_rr_cpu();
1450 } else {
1451 if (hax_enabled()) {
1453 * FIXME: race condition with the exit_request check in
1454 * hax_vcpu_hax_exec
1456 cpu->exit_request = 1;
1458 qemu_cpu_kick_thread(cpu);
1462 void qemu_cpu_kick_self(void)
1464 assert(current_cpu);
1465 qemu_cpu_kick_thread(current_cpu);
1468 bool qemu_cpu_is_self(CPUState *cpu)
1470 return qemu_thread_is_self(cpu->thread);
1473 bool qemu_in_vcpu_thread(void)
1475 return current_cpu && qemu_cpu_is_self(current_cpu);
1478 static __thread bool iothread_locked = false;
1480 bool qemu_mutex_iothread_locked(void)
1482 return iothread_locked;
1485 void qemu_mutex_lock_iothread(void)
1487 g_assert(!qemu_mutex_iothread_locked());
1488 qemu_mutex_lock(&qemu_global_mutex);
1489 iothread_locked = true;
1492 void qemu_mutex_unlock_iothread(void)
1494 g_assert(qemu_mutex_iothread_locked());
1495 iothread_locked = false;
1496 qemu_mutex_unlock(&qemu_global_mutex);
1499 static bool all_vcpus_paused(void)
1501 CPUState *cpu;
1503 CPU_FOREACH(cpu) {
1504 if (!cpu->stopped) {
1505 return false;
1509 return true;
1512 void pause_all_vcpus(void)
1514 CPUState *cpu;
1516 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1517 CPU_FOREACH(cpu) {
1518 cpu->stop = true;
1519 qemu_cpu_kick(cpu);
1522 if (qemu_in_vcpu_thread()) {
1523 cpu_stop_current();
1526 while (!all_vcpus_paused()) {
1527 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1528 CPU_FOREACH(cpu) {
1529 qemu_cpu_kick(cpu);
1534 void cpu_resume(CPUState *cpu)
1536 cpu->stop = false;
1537 cpu->stopped = false;
1538 qemu_cpu_kick(cpu);
1541 void resume_all_vcpus(void)
1543 CPUState *cpu;
1545 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1546 CPU_FOREACH(cpu) {
1547 cpu_resume(cpu);
1551 void cpu_remove(CPUState *cpu)
1553 cpu->stop = true;
1554 cpu->unplug = true;
1555 qemu_cpu_kick(cpu);
1558 void cpu_remove_sync(CPUState *cpu)
1560 cpu_remove(cpu);
1561 while (cpu->created) {
1562 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1566 /* For temporary buffers for forming a name */
1567 #define VCPU_THREAD_NAME_SIZE 16
1569 static void qemu_tcg_init_vcpu(CPUState *cpu)
1571 char thread_name[VCPU_THREAD_NAME_SIZE];
1572 static QemuCond *single_tcg_halt_cond;
1573 static QemuThread *single_tcg_cpu_thread;
1575 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1576 cpu->thread = g_malloc0(sizeof(QemuThread));
1577 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1578 qemu_cond_init(cpu->halt_cond);
1580 if (qemu_tcg_mttcg_enabled()) {
1581 /* create a thread per vCPU with TCG (MTTCG) */
1582 parallel_cpus = true;
1583 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1584 cpu->cpu_index);
1586 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1587 cpu, QEMU_THREAD_JOINABLE);
1589 } else {
1590 /* share a single thread for all cpus with TCG */
1591 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1592 qemu_thread_create(cpu->thread, thread_name,
1593 qemu_tcg_rr_cpu_thread_fn,
1594 cpu, QEMU_THREAD_JOINABLE);
1596 single_tcg_halt_cond = cpu->halt_cond;
1597 single_tcg_cpu_thread = cpu->thread;
1599 #ifdef _WIN32
1600 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1601 #endif
1602 while (!cpu->created) {
1603 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1605 } else {
1606 /* For non-MTTCG cases we share the thread */
1607 cpu->thread = single_tcg_cpu_thread;
1608 cpu->halt_cond = single_tcg_halt_cond;
1612 static void qemu_hax_start_vcpu(CPUState *cpu)
1614 char thread_name[VCPU_THREAD_NAME_SIZE];
1616 cpu->thread = g_malloc0(sizeof(QemuThread));
1617 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1618 qemu_cond_init(cpu->halt_cond);
1620 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1621 cpu->cpu_index);
1622 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1623 cpu, QEMU_THREAD_JOINABLE);
1624 #ifdef _WIN32
1625 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1626 #endif
1627 while (!cpu->created) {
1628 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1632 static void qemu_kvm_start_vcpu(CPUState *cpu)
1634 char thread_name[VCPU_THREAD_NAME_SIZE];
1636 cpu->thread = g_malloc0(sizeof(QemuThread));
1637 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1638 qemu_cond_init(cpu->halt_cond);
1639 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1640 cpu->cpu_index);
1641 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1642 cpu, QEMU_THREAD_JOINABLE);
1643 while (!cpu->created) {
1644 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1648 static void qemu_dummy_start_vcpu(CPUState *cpu)
1650 char thread_name[VCPU_THREAD_NAME_SIZE];
1652 cpu->thread = g_malloc0(sizeof(QemuThread));
1653 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1654 qemu_cond_init(cpu->halt_cond);
1655 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1656 cpu->cpu_index);
1657 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1658 QEMU_THREAD_JOINABLE);
1659 while (!cpu->created) {
1660 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1664 void qemu_init_vcpu(CPUState *cpu)
1666 cpu->nr_cores = smp_cores;
1667 cpu->nr_threads = smp_threads;
1668 cpu->stopped = true;
1670 if (!cpu->as) {
1671 /* If the target cpu hasn't set up any address spaces itself,
1672 * give it the default one.
1674 AddressSpace *as = address_space_init_shareable(cpu->memory,
1675 "cpu-memory");
1676 cpu->num_ases = 1;
1677 cpu_address_space_init(cpu, as, 0);
1680 if (kvm_enabled()) {
1681 qemu_kvm_start_vcpu(cpu);
1682 } else if (hax_enabled()) {
1683 qemu_hax_start_vcpu(cpu);
1684 } else if (tcg_enabled()) {
1685 qemu_tcg_init_vcpu(cpu);
1686 } else {
1687 qemu_dummy_start_vcpu(cpu);
1691 void cpu_stop_current(void)
1693 if (current_cpu) {
1694 current_cpu->stop = false;
1695 current_cpu->stopped = true;
1696 cpu_exit(current_cpu);
1697 qemu_cond_broadcast(&qemu_pause_cond);
1701 int vm_stop(RunState state)
1703 if (qemu_in_vcpu_thread()) {
1704 qemu_system_vmstop_request_prepare();
1705 qemu_system_vmstop_request(state);
1707 * FIXME: should not return to device code in case
1708 * vm_stop() has been requested.
1710 cpu_stop_current();
1711 return 0;
1714 return do_vm_stop(state);
1718 * Prepare for (re)starting the VM.
1719 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1720 * running or in case of an error condition), 0 otherwise.
1722 int vm_prepare_start(void)
1724 RunState requested;
1725 int res = 0;
1727 qemu_vmstop_requested(&requested);
1728 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1729 return -1;
1732 /* Ensure that a STOP/RESUME pair of events is emitted if a
1733 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1734 * example, according to documentation is always followed by
1735 * the STOP event.
1737 if (runstate_is_running()) {
1738 qapi_event_send_stop(&error_abort);
1739 res = -1;
1740 } else {
1741 replay_enable_events();
1742 cpu_enable_ticks();
1743 runstate_set(RUN_STATE_RUNNING);
1744 vm_state_notify(1, RUN_STATE_RUNNING);
1747 /* We are sending this now, but the CPUs will be resumed shortly later */
1748 qapi_event_send_resume(&error_abort);
1749 return res;
1752 void vm_start(void)
1754 if (!vm_prepare_start()) {
1755 resume_all_vcpus();
1759 /* does a state transition even if the VM is already stopped,
1760 current state is forgotten forever */
1761 int vm_stop_force_state(RunState state)
1763 if (runstate_is_running()) {
1764 return vm_stop(state);
1765 } else {
1766 runstate_set(state);
1768 bdrv_drain_all();
1769 /* Make sure to return an error if the flush in a previous vm_stop()
1770 * failed. */
1771 return bdrv_flush_all();
1775 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1777 /* XXX: implement xxx_cpu_list for targets that still miss it */
1778 #if defined(cpu_list)
1779 cpu_list(f, cpu_fprintf);
1780 #endif
1783 CpuInfoList *qmp_query_cpus(Error **errp)
1785 CpuInfoList *head = NULL, *cur_item = NULL;
1786 CPUState *cpu;
1788 CPU_FOREACH(cpu) {
1789 CpuInfoList *info;
1790 #if defined(TARGET_I386)
1791 X86CPU *x86_cpu = X86_CPU(cpu);
1792 CPUX86State *env = &x86_cpu->env;
1793 #elif defined(TARGET_PPC)
1794 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1795 CPUPPCState *env = &ppc_cpu->env;
1796 #elif defined(TARGET_SPARC)
1797 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1798 CPUSPARCState *env = &sparc_cpu->env;
1799 #elif defined(TARGET_MIPS)
1800 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1801 CPUMIPSState *env = &mips_cpu->env;
1802 #elif defined(TARGET_TRICORE)
1803 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1804 CPUTriCoreState *env = &tricore_cpu->env;
1805 #endif
1807 cpu_synchronize_state(cpu);
1809 info = g_malloc0(sizeof(*info));
1810 info->value = g_malloc0(sizeof(*info->value));
1811 info->value->CPU = cpu->cpu_index;
1812 info->value->current = (cpu == first_cpu);
1813 info->value->halted = cpu->halted;
1814 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1815 info->value->thread_id = cpu->thread_id;
1816 #if defined(TARGET_I386)
1817 info->value->arch = CPU_INFO_ARCH_X86;
1818 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1819 #elif defined(TARGET_PPC)
1820 info->value->arch = CPU_INFO_ARCH_PPC;
1821 info->value->u.ppc.nip = env->nip;
1822 #elif defined(TARGET_SPARC)
1823 info->value->arch = CPU_INFO_ARCH_SPARC;
1824 info->value->u.q_sparc.pc = env->pc;
1825 info->value->u.q_sparc.npc = env->npc;
1826 #elif defined(TARGET_MIPS)
1827 info->value->arch = CPU_INFO_ARCH_MIPS;
1828 info->value->u.q_mips.PC = env->active_tc.PC;
1829 #elif defined(TARGET_TRICORE)
1830 info->value->arch = CPU_INFO_ARCH_TRICORE;
1831 info->value->u.tricore.PC = env->PC;
1832 #else
1833 info->value->arch = CPU_INFO_ARCH_OTHER;
1834 #endif
1836 /* XXX: waiting for the qapi to support GSList */
1837 if (!cur_item) {
1838 head = cur_item = info;
1839 } else {
1840 cur_item->next = info;
1841 cur_item = info;
1845 return head;
1848 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1849 bool has_cpu, int64_t cpu_index, Error **errp)
1851 FILE *f;
1852 uint32_t l;
1853 CPUState *cpu;
1854 uint8_t buf[1024];
1855 int64_t orig_addr = addr, orig_size = size;
1857 if (!has_cpu) {
1858 cpu_index = 0;
1861 cpu = qemu_get_cpu(cpu_index);
1862 if (cpu == NULL) {
1863 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1864 "a CPU number");
1865 return;
1868 f = fopen(filename, "wb");
1869 if (!f) {
1870 error_setg_file_open(errp, errno, filename);
1871 return;
1874 while (size != 0) {
1875 l = sizeof(buf);
1876 if (l > size)
1877 l = size;
1878 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1879 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1880 " specified", orig_addr, orig_size);
1881 goto exit;
1883 if (fwrite(buf, 1, l, f) != l) {
1884 error_setg(errp, QERR_IO_ERROR);
1885 goto exit;
1887 addr += l;
1888 size -= l;
1891 exit:
1892 fclose(f);
1895 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1896 Error **errp)
1898 FILE *f;
1899 uint32_t l;
1900 uint8_t buf[1024];
1902 f = fopen(filename, "wb");
1903 if (!f) {
1904 error_setg_file_open(errp, errno, filename);
1905 return;
1908 while (size != 0) {
1909 l = sizeof(buf);
1910 if (l > size)
1911 l = size;
1912 cpu_physical_memory_read(addr, buf, l);
1913 if (fwrite(buf, 1, l, f) != l) {
1914 error_setg(errp, QERR_IO_ERROR);
1915 goto exit;
1917 addr += l;
1918 size -= l;
1921 exit:
1922 fclose(f);
1925 void qmp_inject_nmi(Error **errp)
1927 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1930 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1932 if (!use_icount) {
1933 return;
1936 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1937 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1938 if (icount_align_option) {
1939 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1940 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1941 } else {
1942 cpu_fprintf(f, "Max guest delay NA\n");
1943 cpu_fprintf(f, "Max guest advance NA\n");