virtio-net: only delete bh that existed
[qemu/raspi.git] / cpus.c
blob912938cd1bae5d550ed88372c3fa349e82b1d7cb
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
42 #ifndef _WIN32
43 #include "qemu/compatfd.h"
44 #endif
46 #ifdef CONFIG_LINUX
48 #include <sys/prctl.h>
50 #ifndef PR_MCE_KILL
51 #define PR_MCE_KILL 33
52 #endif
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
56 #endif
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
60 #endif
62 #endif /* CONFIG_LINUX */
64 static CPUState *next_cpu;
66 bool cpu_is_stopped(CPUState *cpu)
68 return cpu->stopped || !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState *cpu)
73 if (cpu->stop || cpu->queued_work_first) {
74 return false;
76 if (cpu_is_stopped(cpu)) {
77 return true;
79 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
80 kvm_halt_in_kernel()) {
81 return false;
83 return true;
86 static bool all_cpu_threads_idle(void)
88 CPUState *cpu;
90 CPU_FOREACH(cpu) {
91 if (!cpu_thread_is_idle(cpu)) {
92 return false;
95 return true;
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Protected by TimersState seqlock */
103 /* Compensate for varying guest execution speed. */
104 static int64_t qemu_icount_bias;
105 static int64_t vm_clock_warp_start;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 /* Only written by TCG thread */
112 static int64_t qemu_icount;
114 static QEMUTimer *icount_rt_timer;
115 static QEMUTimer *icount_vm_timer;
116 static QEMUTimer *icount_warp_timer;
118 typedef struct TimersState {
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
126 QemuSeqLock vm_clock_seqlock;
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
130 } TimersState;
132 static TimersState timers_state;
134 /* Return the virtual CPU time, based on the instruction counter. */
135 static int64_t cpu_get_icount_locked(void)
137 int64_t icount;
138 CPUState *cpu = current_cpu;
140 icount = qemu_icount;
141 if (cpu) {
142 CPUArchState *env = cpu->env_ptr;
143 if (!can_do_io(env)) {
144 fprintf(stderr, "Bad clock read\n");
146 icount -= (env->icount_decr.u16.low + env->icount_extra);
148 return qemu_icount_bias + (icount << icount_time_shift);
151 int64_t cpu_get_icount(void)
153 int64_t icount;
154 unsigned start;
156 do {
157 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
158 icount = cpu_get_icount_locked();
159 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
161 return icount;
164 /* return the host CPU cycle counter and handle stop/restart */
165 /* Caller must hold the BQL */
166 int64_t cpu_get_ticks(void)
168 if (use_icount) {
169 return cpu_get_icount();
171 if (!timers_state.cpu_ticks_enabled) {
172 return timers_state.cpu_ticks_offset;
173 } else {
174 int64_t ticks;
175 ticks = cpu_get_real_ticks();
176 if (timers_state.cpu_ticks_prev > ticks) {
177 /* Note: non increasing ticks may happen if the host uses
178 software suspend */
179 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
181 timers_state.cpu_ticks_prev = ticks;
182 return ticks + timers_state.cpu_ticks_offset;
186 static int64_t cpu_get_clock_locked(void)
188 int64_t ti;
190 if (!timers_state.cpu_ticks_enabled) {
191 ti = timers_state.cpu_clock_offset;
192 } else {
193 ti = get_clock();
194 ti += timers_state.cpu_clock_offset;
197 return ti;
200 /* return the host CPU monotonic timer and handle stop/restart */
201 int64_t cpu_get_clock(void)
203 int64_t ti;
204 unsigned start;
206 do {
207 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
208 ti = cpu_get_clock_locked();
209 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
211 return ti;
214 /* enable cpu_get_ticks()
215 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
217 void cpu_enable_ticks(void)
219 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
220 seqlock_write_lock(&timers_state.vm_clock_seqlock);
221 if (!timers_state.cpu_ticks_enabled) {
222 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
223 timers_state.cpu_clock_offset -= get_clock();
224 timers_state.cpu_ticks_enabled = 1;
226 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
229 /* disable cpu_get_ticks() : the clock is stopped. You must not call
230 * cpu_get_ticks() after that.
231 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
233 void cpu_disable_ticks(void)
235 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
236 seqlock_write_lock(&timers_state.vm_clock_seqlock);
237 if (timers_state.cpu_ticks_enabled) {
238 timers_state.cpu_ticks_offset = cpu_get_ticks();
239 timers_state.cpu_clock_offset = cpu_get_clock_locked();
240 timers_state.cpu_ticks_enabled = 0;
242 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
245 /* Correlation between real and virtual time is always going to be
246 fairly approximate, so ignore small variation.
247 When the guest is idle real and virtual time will be aligned in
248 the IO wait loop. */
249 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
251 static void icount_adjust(void)
253 int64_t cur_time;
254 int64_t cur_icount;
255 int64_t delta;
257 /* Protected by TimersState mutex. */
258 static int64_t last_delta;
260 /* If the VM is not running, then do nothing. */
261 if (!runstate_is_running()) {
262 return;
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
266 cur_time = cpu_get_clock_locked();
267 cur_icount = cpu_get_icount_locked();
269 delta = cur_icount - cur_time;
270 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
271 if (delta > 0
272 && last_delta + ICOUNT_WOBBLE < delta * 2
273 && icount_time_shift > 0) {
274 /* The guest is getting too far ahead. Slow time down. */
275 icount_time_shift--;
277 if (delta < 0
278 && last_delta - ICOUNT_WOBBLE > delta * 2
279 && icount_time_shift < MAX_ICOUNT_SHIFT) {
280 /* The guest is getting too far behind. Speed time up. */
281 icount_time_shift++;
283 last_delta = delta;
284 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
285 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
288 static void icount_adjust_rt(void *opaque)
290 timer_mod(icount_rt_timer,
291 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
292 icount_adjust();
295 static void icount_adjust_vm(void *opaque)
297 timer_mod(icount_vm_timer,
298 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
299 get_ticks_per_sec() / 10);
300 icount_adjust();
303 static int64_t qemu_icount_round(int64_t count)
305 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
308 static void icount_warp_rt(void *opaque)
310 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
311 * changes from -1 to another value, so the race here is okay.
313 if (atomic_read(&vm_clock_warp_start) == -1) {
314 return;
317 seqlock_write_lock(&timers_state.vm_clock_seqlock);
318 if (runstate_is_running()) {
319 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
320 int64_t warp_delta;
322 warp_delta = clock - vm_clock_warp_start;
323 if (use_icount == 2) {
325 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
326 * far ahead of real time.
328 int64_t cur_time = cpu_get_clock_locked();
329 int64_t cur_icount = cpu_get_icount_locked();
330 int64_t delta = cur_time - cur_icount;
331 warp_delta = MIN(warp_delta, delta);
333 qemu_icount_bias += warp_delta;
335 vm_clock_warp_start = -1;
336 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
338 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
339 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
343 void qtest_clock_warp(int64_t dest)
345 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
346 assert(qtest_enabled());
347 while (clock < dest) {
348 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
349 int64_t warp = MIN(dest - clock, deadline);
350 seqlock_write_lock(&timers_state.vm_clock_seqlock);
351 qemu_icount_bias += warp;
352 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
354 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
355 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
360 void qemu_clock_warp(QEMUClockType type)
362 int64_t clock;
363 int64_t deadline;
366 * There are too many global variables to make the "warp" behavior
367 * applicable to other clocks. But a clock argument removes the
368 * need for if statements all over the place.
370 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
371 return;
375 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
376 * This ensures that the deadline for the timer is computed correctly below.
377 * This also makes sure that the insn counter is synchronized before the
378 * CPU starts running, in case the CPU is woken by an event other than
379 * the earliest QEMU_CLOCK_VIRTUAL timer.
381 icount_warp_rt(NULL);
382 timer_del(icount_warp_timer);
383 if (!all_cpu_threads_idle()) {
384 return;
387 if (qtest_enabled()) {
388 /* When testing, qtest commands advance icount. */
389 return;
392 /* We want to use the earliest deadline from ALL vm_clocks */
393 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
394 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
395 if (deadline < 0) {
396 return;
399 if (deadline > 0) {
401 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
402 * sleep. Otherwise, the CPU might be waiting for a future timer
403 * interrupt to wake it up, but the interrupt never comes because
404 * the vCPU isn't running any insns and thus doesn't advance the
405 * QEMU_CLOCK_VIRTUAL.
407 * An extreme solution for this problem would be to never let VCPUs
408 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
409 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
410 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
411 * after some e"real" time, (related to the time left until the next
412 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
413 * This avoids that the warps are visible externally; for example,
414 * you will not be sending network packets continuously instead of
415 * every 100ms.
417 seqlock_write_lock(&timers_state.vm_clock_seqlock);
418 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
419 vm_clock_warp_start = clock;
421 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
422 timer_mod_anticipate(icount_warp_timer, clock + deadline);
423 } else if (deadline == 0) {
424 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
428 static const VMStateDescription vmstate_timers = {
429 .name = "timer",
430 .version_id = 2,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .fields = (VMStateField[]) {
434 VMSTATE_INT64(cpu_ticks_offset, TimersState),
435 VMSTATE_INT64(dummy, TimersState),
436 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
437 VMSTATE_END_OF_LIST()
441 void configure_icount(const char *option)
443 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
444 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
445 if (!option) {
446 return;
449 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
450 icount_warp_rt, NULL);
451 if (strcmp(option, "auto") != 0) {
452 icount_time_shift = strtol(option, NULL, 0);
453 use_icount = 1;
454 return;
457 use_icount = 2;
459 /* 125MIPS seems a reasonable initial guess at the guest speed.
460 It will be corrected fairly quickly anyway. */
461 icount_time_shift = 3;
463 /* Have both realtime and virtual time triggers for speed adjustment.
464 The realtime trigger catches emulated time passing too slowly,
465 the virtual time trigger catches emulated time passing too fast.
466 Realtime triggers occur even when idle, so use them less frequently
467 than VM triggers. */
468 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
469 icount_adjust_rt, NULL);
470 timer_mod(icount_rt_timer,
471 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
472 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
473 icount_adjust_vm, NULL);
474 timer_mod(icount_vm_timer,
475 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
476 get_ticks_per_sec() / 10);
479 /***********************************************************/
480 void hw_error(const char *fmt, ...)
482 va_list ap;
483 CPUState *cpu;
485 va_start(ap, fmt);
486 fprintf(stderr, "qemu: hardware error: ");
487 vfprintf(stderr, fmt, ap);
488 fprintf(stderr, "\n");
489 CPU_FOREACH(cpu) {
490 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
491 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
493 va_end(ap);
494 abort();
497 void cpu_synchronize_all_states(void)
499 CPUState *cpu;
501 CPU_FOREACH(cpu) {
502 cpu_synchronize_state(cpu);
506 void cpu_synchronize_all_post_reset(void)
508 CPUState *cpu;
510 CPU_FOREACH(cpu) {
511 cpu_synchronize_post_reset(cpu);
515 void cpu_synchronize_all_post_init(void)
517 CPUState *cpu;
519 CPU_FOREACH(cpu) {
520 cpu_synchronize_post_init(cpu);
524 static int do_vm_stop(RunState state)
526 int ret = 0;
528 if (runstate_is_running()) {
529 cpu_disable_ticks();
530 pause_all_vcpus();
531 runstate_set(state);
532 vm_state_notify(0, state);
533 monitor_protocol_event(QEVENT_STOP, NULL);
536 bdrv_drain_all();
537 ret = bdrv_flush_all();
539 return ret;
542 static bool cpu_can_run(CPUState *cpu)
544 if (cpu->stop) {
545 return false;
547 if (cpu_is_stopped(cpu)) {
548 return false;
550 return true;
553 static void cpu_handle_guest_debug(CPUState *cpu)
555 gdb_set_stop_cpu(cpu);
556 qemu_system_debug_request();
557 cpu->stopped = true;
560 static void cpu_signal(int sig)
562 if (current_cpu) {
563 cpu_exit(current_cpu);
565 exit_request = 1;
568 #ifdef CONFIG_LINUX
569 static void sigbus_reraise(void)
571 sigset_t set;
572 struct sigaction action;
574 memset(&action, 0, sizeof(action));
575 action.sa_handler = SIG_DFL;
576 if (!sigaction(SIGBUS, &action, NULL)) {
577 raise(SIGBUS);
578 sigemptyset(&set);
579 sigaddset(&set, SIGBUS);
580 sigprocmask(SIG_UNBLOCK, &set, NULL);
582 perror("Failed to re-raise SIGBUS!\n");
583 abort();
586 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
587 void *ctx)
589 if (kvm_on_sigbus(siginfo->ssi_code,
590 (void *)(intptr_t)siginfo->ssi_addr)) {
591 sigbus_reraise();
595 static void qemu_init_sigbus(void)
597 struct sigaction action;
599 memset(&action, 0, sizeof(action));
600 action.sa_flags = SA_SIGINFO;
601 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
602 sigaction(SIGBUS, &action, NULL);
604 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
607 static void qemu_kvm_eat_signals(CPUState *cpu)
609 struct timespec ts = { 0, 0 };
610 siginfo_t siginfo;
611 sigset_t waitset;
612 sigset_t chkset;
613 int r;
615 sigemptyset(&waitset);
616 sigaddset(&waitset, SIG_IPI);
617 sigaddset(&waitset, SIGBUS);
619 do {
620 r = sigtimedwait(&waitset, &siginfo, &ts);
621 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
622 perror("sigtimedwait");
623 exit(1);
626 switch (r) {
627 case SIGBUS:
628 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
629 sigbus_reraise();
631 break;
632 default:
633 break;
636 r = sigpending(&chkset);
637 if (r == -1) {
638 perror("sigpending");
639 exit(1);
641 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
644 #else /* !CONFIG_LINUX */
646 static void qemu_init_sigbus(void)
650 static void qemu_kvm_eat_signals(CPUState *cpu)
653 #endif /* !CONFIG_LINUX */
655 #ifndef _WIN32
656 static void dummy_signal(int sig)
660 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
662 int r;
663 sigset_t set;
664 struct sigaction sigact;
666 memset(&sigact, 0, sizeof(sigact));
667 sigact.sa_handler = dummy_signal;
668 sigaction(SIG_IPI, &sigact, NULL);
670 pthread_sigmask(SIG_BLOCK, NULL, &set);
671 sigdelset(&set, SIG_IPI);
672 sigdelset(&set, SIGBUS);
673 r = kvm_set_signal_mask(cpu, &set);
674 if (r) {
675 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
676 exit(1);
680 static void qemu_tcg_init_cpu_signals(void)
682 sigset_t set;
683 struct sigaction sigact;
685 memset(&sigact, 0, sizeof(sigact));
686 sigact.sa_handler = cpu_signal;
687 sigaction(SIG_IPI, &sigact, NULL);
689 sigemptyset(&set);
690 sigaddset(&set, SIG_IPI);
691 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
694 #else /* _WIN32 */
695 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
697 abort();
700 static void qemu_tcg_init_cpu_signals(void)
703 #endif /* _WIN32 */
705 static QemuMutex qemu_global_mutex;
706 static QemuCond qemu_io_proceeded_cond;
707 static bool iothread_requesting_mutex;
709 static QemuThread io_thread;
711 static QemuThread *tcg_cpu_thread;
712 static QemuCond *tcg_halt_cond;
714 /* cpu creation */
715 static QemuCond qemu_cpu_cond;
716 /* system init */
717 static QemuCond qemu_pause_cond;
718 static QemuCond qemu_work_cond;
720 void qemu_init_cpu_loop(void)
722 qemu_init_sigbus();
723 qemu_cond_init(&qemu_cpu_cond);
724 qemu_cond_init(&qemu_pause_cond);
725 qemu_cond_init(&qemu_work_cond);
726 qemu_cond_init(&qemu_io_proceeded_cond);
727 qemu_mutex_init(&qemu_global_mutex);
729 qemu_thread_get_self(&io_thread);
732 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
734 struct qemu_work_item wi;
736 if (qemu_cpu_is_self(cpu)) {
737 func(data);
738 return;
741 wi.func = func;
742 wi.data = data;
743 wi.free = false;
744 if (cpu->queued_work_first == NULL) {
745 cpu->queued_work_first = &wi;
746 } else {
747 cpu->queued_work_last->next = &wi;
749 cpu->queued_work_last = &wi;
750 wi.next = NULL;
751 wi.done = false;
753 qemu_cpu_kick(cpu);
754 while (!wi.done) {
755 CPUState *self_cpu = current_cpu;
757 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
758 current_cpu = self_cpu;
762 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
764 struct qemu_work_item *wi;
766 if (qemu_cpu_is_self(cpu)) {
767 func(data);
768 return;
771 wi = g_malloc0(sizeof(struct qemu_work_item));
772 wi->func = func;
773 wi->data = data;
774 wi->free = true;
775 if (cpu->queued_work_first == NULL) {
776 cpu->queued_work_first = wi;
777 } else {
778 cpu->queued_work_last->next = wi;
780 cpu->queued_work_last = wi;
781 wi->next = NULL;
782 wi->done = false;
784 qemu_cpu_kick(cpu);
787 static void flush_queued_work(CPUState *cpu)
789 struct qemu_work_item *wi;
791 if (cpu->queued_work_first == NULL) {
792 return;
795 while ((wi = cpu->queued_work_first)) {
796 cpu->queued_work_first = wi->next;
797 wi->func(wi->data);
798 wi->done = true;
799 if (wi->free) {
800 g_free(wi);
803 cpu->queued_work_last = NULL;
804 qemu_cond_broadcast(&qemu_work_cond);
807 static void qemu_wait_io_event_common(CPUState *cpu)
809 if (cpu->stop) {
810 cpu->stop = false;
811 cpu->stopped = true;
812 qemu_cond_signal(&qemu_pause_cond);
814 flush_queued_work(cpu);
815 cpu->thread_kicked = false;
818 static void qemu_tcg_wait_io_event(void)
820 CPUState *cpu;
822 while (all_cpu_threads_idle()) {
823 /* Start accounting real time to the virtual clock if the CPUs
824 are idle. */
825 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
826 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
829 while (iothread_requesting_mutex) {
830 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
833 CPU_FOREACH(cpu) {
834 qemu_wait_io_event_common(cpu);
838 static void qemu_kvm_wait_io_event(CPUState *cpu)
840 while (cpu_thread_is_idle(cpu)) {
841 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
844 qemu_kvm_eat_signals(cpu);
845 qemu_wait_io_event_common(cpu);
848 static void *qemu_kvm_cpu_thread_fn(void *arg)
850 CPUState *cpu = arg;
851 int r;
853 qemu_mutex_lock(&qemu_global_mutex);
854 qemu_thread_get_self(cpu->thread);
855 cpu->thread_id = qemu_get_thread_id();
856 current_cpu = cpu;
858 r = kvm_init_vcpu(cpu);
859 if (r < 0) {
860 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
861 exit(1);
864 qemu_kvm_init_cpu_signals(cpu);
866 /* signal CPU creation */
867 cpu->created = true;
868 qemu_cond_signal(&qemu_cpu_cond);
870 while (1) {
871 if (cpu_can_run(cpu)) {
872 r = kvm_cpu_exec(cpu);
873 if (r == EXCP_DEBUG) {
874 cpu_handle_guest_debug(cpu);
877 qemu_kvm_wait_io_event(cpu);
880 return NULL;
883 static void *qemu_dummy_cpu_thread_fn(void *arg)
885 #ifdef _WIN32
886 fprintf(stderr, "qtest is not supported under Windows\n");
887 exit(1);
888 #else
889 CPUState *cpu = arg;
890 sigset_t waitset;
891 int r;
893 qemu_mutex_lock_iothread();
894 qemu_thread_get_self(cpu->thread);
895 cpu->thread_id = qemu_get_thread_id();
897 sigemptyset(&waitset);
898 sigaddset(&waitset, SIG_IPI);
900 /* signal CPU creation */
901 cpu->created = true;
902 qemu_cond_signal(&qemu_cpu_cond);
904 current_cpu = cpu;
905 while (1) {
906 current_cpu = NULL;
907 qemu_mutex_unlock_iothread();
908 do {
909 int sig;
910 r = sigwait(&waitset, &sig);
911 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
912 if (r == -1) {
913 perror("sigwait");
914 exit(1);
916 qemu_mutex_lock_iothread();
917 current_cpu = cpu;
918 qemu_wait_io_event_common(cpu);
921 return NULL;
922 #endif
925 static void tcg_exec_all(void);
927 static void *qemu_tcg_cpu_thread_fn(void *arg)
929 CPUState *cpu = arg;
931 qemu_tcg_init_cpu_signals();
932 qemu_thread_get_self(cpu->thread);
934 qemu_mutex_lock(&qemu_global_mutex);
935 CPU_FOREACH(cpu) {
936 cpu->thread_id = qemu_get_thread_id();
937 cpu->created = true;
939 qemu_cond_signal(&qemu_cpu_cond);
941 /* wait for initial kick-off after machine start */
942 while (QTAILQ_FIRST(&cpus)->stopped) {
943 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
945 /* process any pending work */
946 CPU_FOREACH(cpu) {
947 qemu_wait_io_event_common(cpu);
951 while (1) {
952 tcg_exec_all();
954 if (use_icount) {
955 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
957 if (deadline == 0) {
958 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
961 qemu_tcg_wait_io_event();
964 return NULL;
967 static void qemu_cpu_kick_thread(CPUState *cpu)
969 #ifndef _WIN32
970 int err;
972 err = pthread_kill(cpu->thread->thread, SIG_IPI);
973 if (err) {
974 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
975 exit(1);
977 #else /* _WIN32 */
978 if (!qemu_cpu_is_self(cpu)) {
979 CONTEXT tcgContext;
981 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
982 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
983 GetLastError());
984 exit(1);
987 /* On multi-core systems, we are not sure that the thread is actually
988 * suspended until we can get the context.
990 tcgContext.ContextFlags = CONTEXT_CONTROL;
991 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
992 continue;
995 cpu_signal(0);
997 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
998 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
999 GetLastError());
1000 exit(1);
1003 #endif
1006 void qemu_cpu_kick(CPUState *cpu)
1008 qemu_cond_broadcast(cpu->halt_cond);
1009 if (!tcg_enabled() && !cpu->thread_kicked) {
1010 qemu_cpu_kick_thread(cpu);
1011 cpu->thread_kicked = true;
1015 void qemu_cpu_kick_self(void)
1017 #ifndef _WIN32
1018 assert(current_cpu);
1020 if (!current_cpu->thread_kicked) {
1021 qemu_cpu_kick_thread(current_cpu);
1022 current_cpu->thread_kicked = true;
1024 #else
1025 abort();
1026 #endif
1029 bool qemu_cpu_is_self(CPUState *cpu)
1031 return qemu_thread_is_self(cpu->thread);
1034 static bool qemu_in_vcpu_thread(void)
1036 return current_cpu && qemu_cpu_is_self(current_cpu);
1039 void qemu_mutex_lock_iothread(void)
1041 if (!tcg_enabled()) {
1042 qemu_mutex_lock(&qemu_global_mutex);
1043 } else {
1044 iothread_requesting_mutex = true;
1045 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1046 qemu_cpu_kick_thread(first_cpu);
1047 qemu_mutex_lock(&qemu_global_mutex);
1049 iothread_requesting_mutex = false;
1050 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1054 void qemu_mutex_unlock_iothread(void)
1056 qemu_mutex_unlock(&qemu_global_mutex);
1059 static int all_vcpus_paused(void)
1061 CPUState *cpu;
1063 CPU_FOREACH(cpu) {
1064 if (!cpu->stopped) {
1065 return 0;
1069 return 1;
1072 void pause_all_vcpus(void)
1074 CPUState *cpu;
1076 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1077 CPU_FOREACH(cpu) {
1078 cpu->stop = true;
1079 qemu_cpu_kick(cpu);
1082 if (qemu_in_vcpu_thread()) {
1083 cpu_stop_current();
1084 if (!kvm_enabled()) {
1085 CPU_FOREACH(cpu) {
1086 cpu->stop = false;
1087 cpu->stopped = true;
1089 return;
1093 while (!all_vcpus_paused()) {
1094 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1095 CPU_FOREACH(cpu) {
1096 qemu_cpu_kick(cpu);
1101 void cpu_resume(CPUState *cpu)
1103 cpu->stop = false;
1104 cpu->stopped = false;
1105 qemu_cpu_kick(cpu);
1108 void resume_all_vcpus(void)
1110 CPUState *cpu;
1112 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1113 CPU_FOREACH(cpu) {
1114 cpu_resume(cpu);
1118 static void qemu_tcg_init_vcpu(CPUState *cpu)
1120 /* share a single thread for all cpus with TCG */
1121 if (!tcg_cpu_thread) {
1122 cpu->thread = g_malloc0(sizeof(QemuThread));
1123 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1124 qemu_cond_init(cpu->halt_cond);
1125 tcg_halt_cond = cpu->halt_cond;
1126 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1127 QEMU_THREAD_JOINABLE);
1128 #ifdef _WIN32
1129 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1130 #endif
1131 while (!cpu->created) {
1132 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1134 tcg_cpu_thread = cpu->thread;
1135 } else {
1136 cpu->thread = tcg_cpu_thread;
1137 cpu->halt_cond = tcg_halt_cond;
1141 static void qemu_kvm_start_vcpu(CPUState *cpu)
1143 cpu->thread = g_malloc0(sizeof(QemuThread));
1144 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1145 qemu_cond_init(cpu->halt_cond);
1146 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1147 QEMU_THREAD_JOINABLE);
1148 while (!cpu->created) {
1149 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1153 static void qemu_dummy_start_vcpu(CPUState *cpu)
1155 cpu->thread = g_malloc0(sizeof(QemuThread));
1156 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1157 qemu_cond_init(cpu->halt_cond);
1158 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1159 QEMU_THREAD_JOINABLE);
1160 while (!cpu->created) {
1161 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1165 void qemu_init_vcpu(CPUState *cpu)
1167 cpu->nr_cores = smp_cores;
1168 cpu->nr_threads = smp_threads;
1169 cpu->stopped = true;
1170 if (kvm_enabled()) {
1171 qemu_kvm_start_vcpu(cpu);
1172 } else if (tcg_enabled()) {
1173 qemu_tcg_init_vcpu(cpu);
1174 } else {
1175 qemu_dummy_start_vcpu(cpu);
1179 void cpu_stop_current(void)
1181 if (current_cpu) {
1182 current_cpu->stop = false;
1183 current_cpu->stopped = true;
1184 cpu_exit(current_cpu);
1185 qemu_cond_signal(&qemu_pause_cond);
1189 int vm_stop(RunState state)
1191 if (qemu_in_vcpu_thread()) {
1192 qemu_system_vmstop_request(state);
1194 * FIXME: should not return to device code in case
1195 * vm_stop() has been requested.
1197 cpu_stop_current();
1198 return 0;
1201 return do_vm_stop(state);
1204 /* does a state transition even if the VM is already stopped,
1205 current state is forgotten forever */
1206 int vm_stop_force_state(RunState state)
1208 if (runstate_is_running()) {
1209 return vm_stop(state);
1210 } else {
1211 runstate_set(state);
1212 /* Make sure to return an error if the flush in a previous vm_stop()
1213 * failed. */
1214 return bdrv_flush_all();
1218 static int tcg_cpu_exec(CPUArchState *env)
1220 int ret;
1221 #ifdef CONFIG_PROFILER
1222 int64_t ti;
1223 #endif
1225 #ifdef CONFIG_PROFILER
1226 ti = profile_getclock();
1227 #endif
1228 if (use_icount) {
1229 int64_t count;
1230 int64_t deadline;
1231 int decr;
1232 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1233 env->icount_decr.u16.low = 0;
1234 env->icount_extra = 0;
1235 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1237 /* Maintain prior (possibly buggy) behaviour where if no deadline
1238 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1239 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1240 * nanoseconds.
1242 if ((deadline < 0) || (deadline > INT32_MAX)) {
1243 deadline = INT32_MAX;
1246 count = qemu_icount_round(deadline);
1247 qemu_icount += count;
1248 decr = (count > 0xffff) ? 0xffff : count;
1249 count -= decr;
1250 env->icount_decr.u16.low = decr;
1251 env->icount_extra = count;
1253 ret = cpu_exec(env);
1254 #ifdef CONFIG_PROFILER
1255 qemu_time += profile_getclock() - ti;
1256 #endif
1257 if (use_icount) {
1258 /* Fold pending instructions back into the
1259 instruction counter, and clear the interrupt flag. */
1260 qemu_icount -= (env->icount_decr.u16.low
1261 + env->icount_extra);
1262 env->icount_decr.u32 = 0;
1263 env->icount_extra = 0;
1265 return ret;
1268 static void tcg_exec_all(void)
1270 int r;
1272 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1273 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1275 if (next_cpu == NULL) {
1276 next_cpu = first_cpu;
1278 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1279 CPUState *cpu = next_cpu;
1280 CPUArchState *env = cpu->env_ptr;
1282 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1283 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1285 if (cpu_can_run(cpu)) {
1286 r = tcg_cpu_exec(env);
1287 if (r == EXCP_DEBUG) {
1288 cpu_handle_guest_debug(cpu);
1289 break;
1291 } else if (cpu->stop || cpu->stopped) {
1292 break;
1295 exit_request = 0;
1298 void set_numa_modes(void)
1300 CPUState *cpu;
1301 int i;
1303 CPU_FOREACH(cpu) {
1304 for (i = 0; i < nb_numa_nodes; i++) {
1305 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1306 cpu->numa_node = i;
1312 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1314 /* XXX: implement xxx_cpu_list for targets that still miss it */
1315 #if defined(cpu_list)
1316 cpu_list(f, cpu_fprintf);
1317 #endif
1320 CpuInfoList *qmp_query_cpus(Error **errp)
1322 CpuInfoList *head = NULL, *cur_item = NULL;
1323 CPUState *cpu;
1325 CPU_FOREACH(cpu) {
1326 CpuInfoList *info;
1327 #if defined(TARGET_I386)
1328 X86CPU *x86_cpu = X86_CPU(cpu);
1329 CPUX86State *env = &x86_cpu->env;
1330 #elif defined(TARGET_PPC)
1331 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1332 CPUPPCState *env = &ppc_cpu->env;
1333 #elif defined(TARGET_SPARC)
1334 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1335 CPUSPARCState *env = &sparc_cpu->env;
1336 #elif defined(TARGET_MIPS)
1337 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1338 CPUMIPSState *env = &mips_cpu->env;
1339 #endif
1341 cpu_synchronize_state(cpu);
1343 info = g_malloc0(sizeof(*info));
1344 info->value = g_malloc0(sizeof(*info->value));
1345 info->value->CPU = cpu->cpu_index;
1346 info->value->current = (cpu == first_cpu);
1347 info->value->halted = cpu->halted;
1348 info->value->thread_id = cpu->thread_id;
1349 #if defined(TARGET_I386)
1350 info->value->has_pc = true;
1351 info->value->pc = env->eip + env->segs[R_CS].base;
1352 #elif defined(TARGET_PPC)
1353 info->value->has_nip = true;
1354 info->value->nip = env->nip;
1355 #elif defined(TARGET_SPARC)
1356 info->value->has_pc = true;
1357 info->value->pc = env->pc;
1358 info->value->has_npc = true;
1359 info->value->npc = env->npc;
1360 #elif defined(TARGET_MIPS)
1361 info->value->has_PC = true;
1362 info->value->PC = env->active_tc.PC;
1363 #endif
1365 /* XXX: waiting for the qapi to support GSList */
1366 if (!cur_item) {
1367 head = cur_item = info;
1368 } else {
1369 cur_item->next = info;
1370 cur_item = info;
1374 return head;
1377 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1378 bool has_cpu, int64_t cpu_index, Error **errp)
1380 FILE *f;
1381 uint32_t l;
1382 CPUState *cpu;
1383 uint8_t buf[1024];
1385 if (!has_cpu) {
1386 cpu_index = 0;
1389 cpu = qemu_get_cpu(cpu_index);
1390 if (cpu == NULL) {
1391 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1392 "a CPU number");
1393 return;
1396 f = fopen(filename, "wb");
1397 if (!f) {
1398 error_setg_file_open(errp, errno, filename);
1399 return;
1402 while (size != 0) {
1403 l = sizeof(buf);
1404 if (l > size)
1405 l = size;
1406 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1407 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1408 goto exit;
1410 if (fwrite(buf, 1, l, f) != l) {
1411 error_set(errp, QERR_IO_ERROR);
1412 goto exit;
1414 addr += l;
1415 size -= l;
1418 exit:
1419 fclose(f);
1422 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1423 Error **errp)
1425 FILE *f;
1426 uint32_t l;
1427 uint8_t buf[1024];
1429 f = fopen(filename, "wb");
1430 if (!f) {
1431 error_setg_file_open(errp, errno, filename);
1432 return;
1435 while (size != 0) {
1436 l = sizeof(buf);
1437 if (l > size)
1438 l = size;
1439 cpu_physical_memory_rw(addr, buf, l, 0);
1440 if (fwrite(buf, 1, l, f) != l) {
1441 error_set(errp, QERR_IO_ERROR);
1442 goto exit;
1444 addr += l;
1445 size -= l;
1448 exit:
1449 fclose(f);
1452 void qmp_inject_nmi(Error **errp)
1454 #if defined(TARGET_I386)
1455 CPUState *cs;
1457 CPU_FOREACH(cs) {
1458 X86CPU *cpu = X86_CPU(cs);
1459 CPUX86State *env = &cpu->env;
1461 if (!env->apic_state) {
1462 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1463 } else {
1464 apic_deliver_nmi(env->apic_state);
1467 #elif defined(TARGET_S390X)
1468 CPUState *cs;
1469 S390CPU *cpu;
1471 CPU_FOREACH(cs) {
1472 cpu = S390_CPU(cs);
1473 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1474 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1475 error_set(errp, QERR_UNSUPPORTED);
1476 return;
1478 break;
1481 #else
1482 error_set(errp, QERR_UNSUPPORTED);
1483 #endif