qcow2: Avoid overflow in alloc_clusters_noref()
[qemu/qmp-unstable.git] / cpus.c
blob7bbe15348c611feb7698691fe4819ded56889f7f
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
42 #ifndef _WIN32
43 #include "qemu/compatfd.h"
44 #endif
46 #ifdef CONFIG_LINUX
48 #include <sys/prctl.h>
50 #ifndef PR_MCE_KILL
51 #define PR_MCE_KILL 33
52 #endif
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
56 #endif
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
60 #endif
62 #endif /* CONFIG_LINUX */
64 static CPUState *next_cpu;
66 bool cpu_is_stopped(CPUState *cpu)
68 return cpu->stopped || !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState *cpu)
73 if (cpu->stop || cpu->queued_work_first) {
74 return false;
76 if (cpu_is_stopped(cpu)) {
77 return true;
79 if (!cpu->halted || cpu_has_work(cpu) ||
80 kvm_halt_in_kernel()) {
81 return false;
83 return true;
86 static bool all_cpu_threads_idle(void)
88 CPUState *cpu;
90 CPU_FOREACH(cpu) {
91 if (!cpu_thread_is_idle(cpu)) {
92 return false;
95 return true;
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Protected by TimersState seqlock */
103 /* Compensate for varying guest execution speed. */
104 static int64_t qemu_icount_bias;
105 static int64_t vm_clock_warp_start;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 /* Only written by TCG thread */
112 static int64_t qemu_icount;
114 static QEMUTimer *icount_rt_timer;
115 static QEMUTimer *icount_vm_timer;
116 static QEMUTimer *icount_warp_timer;
118 typedef struct TimersState {
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
126 QemuSeqLock vm_clock_seqlock;
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
130 } TimersState;
132 static TimersState timers_state;
134 /* Return the virtual CPU time, based on the instruction counter. */
135 static int64_t cpu_get_icount_locked(void)
137 int64_t icount;
138 CPUState *cpu = current_cpu;
140 icount = qemu_icount;
141 if (cpu) {
142 if (!cpu_can_do_io(cpu)) {
143 fprintf(stderr, "Bad clock read\n");
145 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
147 return qemu_icount_bias + (icount << icount_time_shift);
150 int64_t cpu_get_icount(void)
152 int64_t icount;
153 unsigned start;
155 do {
156 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
157 icount = cpu_get_icount_locked();
158 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
160 return icount;
163 /* return the host CPU cycle counter and handle stop/restart */
164 /* Caller must hold the BQL */
165 int64_t cpu_get_ticks(void)
167 int64_t ticks;
169 if (use_icount) {
170 return cpu_get_icount();
173 ticks = timers_state.cpu_ticks_offset;
174 if (timers_state.cpu_ticks_enabled) {
175 ticks += cpu_get_real_ticks();
178 if (timers_state.cpu_ticks_prev > ticks) {
179 /* Note: non increasing ticks may happen if the host uses
180 software suspend */
181 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
182 ticks = timers_state.cpu_ticks_prev;
185 timers_state.cpu_ticks_prev = ticks;
186 return ticks;
189 static int64_t cpu_get_clock_locked(void)
191 int64_t ticks;
193 ticks = timers_state.cpu_clock_offset;
194 if (timers_state.cpu_ticks_enabled) {
195 ticks += get_clock();
198 return ticks;
201 /* return the host CPU monotonic timer and handle stop/restart */
202 int64_t cpu_get_clock(void)
204 int64_t ti;
205 unsigned start;
207 do {
208 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
209 ti = cpu_get_clock_locked();
210 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
212 return ti;
215 /* enable cpu_get_ticks()
216 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
218 void cpu_enable_ticks(void)
220 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
221 seqlock_write_lock(&timers_state.vm_clock_seqlock);
222 if (!timers_state.cpu_ticks_enabled) {
223 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
224 timers_state.cpu_clock_offset -= get_clock();
225 timers_state.cpu_ticks_enabled = 1;
227 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
230 /* disable cpu_get_ticks() : the clock is stopped. You must not call
231 * cpu_get_ticks() after that.
232 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
234 void cpu_disable_ticks(void)
236 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
237 seqlock_write_lock(&timers_state.vm_clock_seqlock);
238 if (timers_state.cpu_ticks_enabled) {
239 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
240 timers_state.cpu_clock_offset = cpu_get_clock_locked();
241 timers_state.cpu_ticks_enabled = 0;
243 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
246 /* Correlation between real and virtual time is always going to be
247 fairly approximate, so ignore small variation.
248 When the guest is idle real and virtual time will be aligned in
249 the IO wait loop. */
250 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
252 static void icount_adjust(void)
254 int64_t cur_time;
255 int64_t cur_icount;
256 int64_t delta;
258 /* Protected by TimersState mutex. */
259 static int64_t last_delta;
261 /* If the VM is not running, then do nothing. */
262 if (!runstate_is_running()) {
263 return;
266 seqlock_write_lock(&timers_state.vm_clock_seqlock);
267 cur_time = cpu_get_clock_locked();
268 cur_icount = cpu_get_icount_locked();
270 delta = cur_icount - cur_time;
271 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
272 if (delta > 0
273 && last_delta + ICOUNT_WOBBLE < delta * 2
274 && icount_time_shift > 0) {
275 /* The guest is getting too far ahead. Slow time down. */
276 icount_time_shift--;
278 if (delta < 0
279 && last_delta - ICOUNT_WOBBLE > delta * 2
280 && icount_time_shift < MAX_ICOUNT_SHIFT) {
281 /* The guest is getting too far behind. Speed time up. */
282 icount_time_shift++;
284 last_delta = delta;
285 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
286 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
289 static void icount_adjust_rt(void *opaque)
291 timer_mod(icount_rt_timer,
292 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
293 icount_adjust();
296 static void icount_adjust_vm(void *opaque)
298 timer_mod(icount_vm_timer,
299 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
300 get_ticks_per_sec() / 10);
301 icount_adjust();
304 static int64_t qemu_icount_round(int64_t count)
306 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
309 static void icount_warp_rt(void *opaque)
311 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
312 * changes from -1 to another value, so the race here is okay.
314 if (atomic_read(&vm_clock_warp_start) == -1) {
315 return;
318 seqlock_write_lock(&timers_state.vm_clock_seqlock);
319 if (runstate_is_running()) {
320 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
321 int64_t warp_delta;
323 warp_delta = clock - vm_clock_warp_start;
324 if (use_icount == 2) {
326 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
327 * far ahead of real time.
329 int64_t cur_time = cpu_get_clock_locked();
330 int64_t cur_icount = cpu_get_icount_locked();
331 int64_t delta = cur_time - cur_icount;
332 warp_delta = MIN(warp_delta, delta);
334 qemu_icount_bias += warp_delta;
336 vm_clock_warp_start = -1;
337 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
339 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
340 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
344 void qtest_clock_warp(int64_t dest)
346 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
347 assert(qtest_enabled());
348 while (clock < dest) {
349 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
350 int64_t warp = MIN(dest - clock, deadline);
351 seqlock_write_lock(&timers_state.vm_clock_seqlock);
352 qemu_icount_bias += warp;
353 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
355 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
356 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
361 void qemu_clock_warp(QEMUClockType type)
363 int64_t clock;
364 int64_t deadline;
367 * There are too many global variables to make the "warp" behavior
368 * applicable to other clocks. But a clock argument removes the
369 * need for if statements all over the place.
371 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
372 return;
376 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
377 * This ensures that the deadline for the timer is computed correctly below.
378 * This also makes sure that the insn counter is synchronized before the
379 * CPU starts running, in case the CPU is woken by an event other than
380 * the earliest QEMU_CLOCK_VIRTUAL timer.
382 icount_warp_rt(NULL);
383 timer_del(icount_warp_timer);
384 if (!all_cpu_threads_idle()) {
385 return;
388 if (qtest_enabled()) {
389 /* When testing, qtest commands advance icount. */
390 return;
393 /* We want to use the earliest deadline from ALL vm_clocks */
394 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
395 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
396 if (deadline < 0) {
397 return;
400 if (deadline > 0) {
402 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
403 * sleep. Otherwise, the CPU might be waiting for a future timer
404 * interrupt to wake it up, but the interrupt never comes because
405 * the vCPU isn't running any insns and thus doesn't advance the
406 * QEMU_CLOCK_VIRTUAL.
408 * An extreme solution for this problem would be to never let VCPUs
409 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
410 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
411 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
412 * after some e"real" time, (related to the time left until the next
413 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
414 * This avoids that the warps are visible externally; for example,
415 * you will not be sending network packets continuously instead of
416 * every 100ms.
418 seqlock_write_lock(&timers_state.vm_clock_seqlock);
419 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
420 vm_clock_warp_start = clock;
422 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
423 timer_mod_anticipate(icount_warp_timer, clock + deadline);
424 } else if (deadline == 0) {
425 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
429 static const VMStateDescription vmstate_timers = {
430 .name = "timer",
431 .version_id = 2,
432 .minimum_version_id = 1,
433 .minimum_version_id_old = 1,
434 .fields = (VMStateField[]) {
435 VMSTATE_INT64(cpu_ticks_offset, TimersState),
436 VMSTATE_INT64(dummy, TimersState),
437 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
438 VMSTATE_END_OF_LIST()
442 void configure_icount(const char *option)
444 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
445 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
446 if (!option) {
447 return;
450 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
451 icount_warp_rt, NULL);
452 if (strcmp(option, "auto") != 0) {
453 icount_time_shift = strtol(option, NULL, 0);
454 use_icount = 1;
455 return;
458 use_icount = 2;
460 /* 125MIPS seems a reasonable initial guess at the guest speed.
461 It will be corrected fairly quickly anyway. */
462 icount_time_shift = 3;
464 /* Have both realtime and virtual time triggers for speed adjustment.
465 The realtime trigger catches emulated time passing too slowly,
466 the virtual time trigger catches emulated time passing too fast.
467 Realtime triggers occur even when idle, so use them less frequently
468 than VM triggers. */
469 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
470 icount_adjust_rt, NULL);
471 timer_mod(icount_rt_timer,
472 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
473 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
474 icount_adjust_vm, NULL);
475 timer_mod(icount_vm_timer,
476 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
477 get_ticks_per_sec() / 10);
480 /***********************************************************/
481 void hw_error(const char *fmt, ...)
483 va_list ap;
484 CPUState *cpu;
486 va_start(ap, fmt);
487 fprintf(stderr, "qemu: hardware error: ");
488 vfprintf(stderr, fmt, ap);
489 fprintf(stderr, "\n");
490 CPU_FOREACH(cpu) {
491 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
492 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
494 va_end(ap);
495 abort();
498 void cpu_synchronize_all_states(void)
500 CPUState *cpu;
502 CPU_FOREACH(cpu) {
503 cpu_synchronize_state(cpu);
507 void cpu_synchronize_all_post_reset(void)
509 CPUState *cpu;
511 CPU_FOREACH(cpu) {
512 cpu_synchronize_post_reset(cpu);
516 void cpu_synchronize_all_post_init(void)
518 CPUState *cpu;
520 CPU_FOREACH(cpu) {
521 cpu_synchronize_post_init(cpu);
525 static int do_vm_stop(RunState state)
527 int ret = 0;
529 if (runstate_is_running()) {
530 cpu_disable_ticks();
531 pause_all_vcpus();
532 runstate_set(state);
533 vm_state_notify(0, state);
534 monitor_protocol_event(QEVENT_STOP, NULL);
537 bdrv_drain_all();
538 ret = bdrv_flush_all();
540 return ret;
543 static bool cpu_can_run(CPUState *cpu)
545 if (cpu->stop) {
546 return false;
548 if (cpu_is_stopped(cpu)) {
549 return false;
551 return true;
554 static void cpu_handle_guest_debug(CPUState *cpu)
556 gdb_set_stop_cpu(cpu);
557 qemu_system_debug_request();
558 cpu->stopped = true;
561 static void cpu_signal(int sig)
563 if (current_cpu) {
564 cpu_exit(current_cpu);
566 exit_request = 1;
569 #ifdef CONFIG_LINUX
570 static void sigbus_reraise(void)
572 sigset_t set;
573 struct sigaction action;
575 memset(&action, 0, sizeof(action));
576 action.sa_handler = SIG_DFL;
577 if (!sigaction(SIGBUS, &action, NULL)) {
578 raise(SIGBUS);
579 sigemptyset(&set);
580 sigaddset(&set, SIGBUS);
581 sigprocmask(SIG_UNBLOCK, &set, NULL);
583 perror("Failed to re-raise SIGBUS!\n");
584 abort();
587 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
588 void *ctx)
590 if (kvm_on_sigbus(siginfo->ssi_code,
591 (void *)(intptr_t)siginfo->ssi_addr)) {
592 sigbus_reraise();
596 static void qemu_init_sigbus(void)
598 struct sigaction action;
600 memset(&action, 0, sizeof(action));
601 action.sa_flags = SA_SIGINFO;
602 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
603 sigaction(SIGBUS, &action, NULL);
605 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
608 static void qemu_kvm_eat_signals(CPUState *cpu)
610 struct timespec ts = { 0, 0 };
611 siginfo_t siginfo;
612 sigset_t waitset;
613 sigset_t chkset;
614 int r;
616 sigemptyset(&waitset);
617 sigaddset(&waitset, SIG_IPI);
618 sigaddset(&waitset, SIGBUS);
620 do {
621 r = sigtimedwait(&waitset, &siginfo, &ts);
622 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
623 perror("sigtimedwait");
624 exit(1);
627 switch (r) {
628 case SIGBUS:
629 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
630 sigbus_reraise();
632 break;
633 default:
634 break;
637 r = sigpending(&chkset);
638 if (r == -1) {
639 perror("sigpending");
640 exit(1);
642 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
645 #else /* !CONFIG_LINUX */
647 static void qemu_init_sigbus(void)
651 static void qemu_kvm_eat_signals(CPUState *cpu)
654 #endif /* !CONFIG_LINUX */
656 #ifndef _WIN32
657 static void dummy_signal(int sig)
661 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
663 int r;
664 sigset_t set;
665 struct sigaction sigact;
667 memset(&sigact, 0, sizeof(sigact));
668 sigact.sa_handler = dummy_signal;
669 sigaction(SIG_IPI, &sigact, NULL);
671 pthread_sigmask(SIG_BLOCK, NULL, &set);
672 sigdelset(&set, SIG_IPI);
673 sigdelset(&set, SIGBUS);
674 r = kvm_set_signal_mask(cpu, &set);
675 if (r) {
676 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
677 exit(1);
681 static void qemu_tcg_init_cpu_signals(void)
683 sigset_t set;
684 struct sigaction sigact;
686 memset(&sigact, 0, sizeof(sigact));
687 sigact.sa_handler = cpu_signal;
688 sigaction(SIG_IPI, &sigact, NULL);
690 sigemptyset(&set);
691 sigaddset(&set, SIG_IPI);
692 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
695 #else /* _WIN32 */
696 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
698 abort();
701 static void qemu_tcg_init_cpu_signals(void)
704 #endif /* _WIN32 */
706 static QemuMutex qemu_global_mutex;
707 static QemuCond qemu_io_proceeded_cond;
708 static bool iothread_requesting_mutex;
710 static QemuThread io_thread;
712 static QemuThread *tcg_cpu_thread;
713 static QemuCond *tcg_halt_cond;
715 /* cpu creation */
716 static QemuCond qemu_cpu_cond;
717 /* system init */
718 static QemuCond qemu_pause_cond;
719 static QemuCond qemu_work_cond;
721 void qemu_init_cpu_loop(void)
723 qemu_init_sigbus();
724 qemu_cond_init(&qemu_cpu_cond);
725 qemu_cond_init(&qemu_pause_cond);
726 qemu_cond_init(&qemu_work_cond);
727 qemu_cond_init(&qemu_io_proceeded_cond);
728 qemu_mutex_init(&qemu_global_mutex);
730 qemu_thread_get_self(&io_thread);
733 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
735 struct qemu_work_item wi;
737 if (qemu_cpu_is_self(cpu)) {
738 func(data);
739 return;
742 wi.func = func;
743 wi.data = data;
744 wi.free = false;
745 if (cpu->queued_work_first == NULL) {
746 cpu->queued_work_first = &wi;
747 } else {
748 cpu->queued_work_last->next = &wi;
750 cpu->queued_work_last = &wi;
751 wi.next = NULL;
752 wi.done = false;
754 qemu_cpu_kick(cpu);
755 while (!wi.done) {
756 CPUState *self_cpu = current_cpu;
758 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
759 current_cpu = self_cpu;
763 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
765 struct qemu_work_item *wi;
767 if (qemu_cpu_is_self(cpu)) {
768 func(data);
769 return;
772 wi = g_malloc0(sizeof(struct qemu_work_item));
773 wi->func = func;
774 wi->data = data;
775 wi->free = true;
776 if (cpu->queued_work_first == NULL) {
777 cpu->queued_work_first = wi;
778 } else {
779 cpu->queued_work_last->next = wi;
781 cpu->queued_work_last = wi;
782 wi->next = NULL;
783 wi->done = false;
785 qemu_cpu_kick(cpu);
788 static void flush_queued_work(CPUState *cpu)
790 struct qemu_work_item *wi;
792 if (cpu->queued_work_first == NULL) {
793 return;
796 while ((wi = cpu->queued_work_first)) {
797 cpu->queued_work_first = wi->next;
798 wi->func(wi->data);
799 wi->done = true;
800 if (wi->free) {
801 g_free(wi);
804 cpu->queued_work_last = NULL;
805 qemu_cond_broadcast(&qemu_work_cond);
808 static void qemu_wait_io_event_common(CPUState *cpu)
810 if (cpu->stop) {
811 cpu->stop = false;
812 cpu->stopped = true;
813 qemu_cond_signal(&qemu_pause_cond);
815 flush_queued_work(cpu);
816 cpu->thread_kicked = false;
819 static void qemu_tcg_wait_io_event(void)
821 CPUState *cpu;
823 while (all_cpu_threads_idle()) {
824 /* Start accounting real time to the virtual clock if the CPUs
825 are idle. */
826 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
827 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
830 while (iothread_requesting_mutex) {
831 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
834 CPU_FOREACH(cpu) {
835 qemu_wait_io_event_common(cpu);
839 static void qemu_kvm_wait_io_event(CPUState *cpu)
841 while (cpu_thread_is_idle(cpu)) {
842 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
845 qemu_kvm_eat_signals(cpu);
846 qemu_wait_io_event_common(cpu);
849 static void *qemu_kvm_cpu_thread_fn(void *arg)
851 CPUState *cpu = arg;
852 int r;
854 qemu_mutex_lock(&qemu_global_mutex);
855 qemu_thread_get_self(cpu->thread);
856 cpu->thread_id = qemu_get_thread_id();
857 current_cpu = cpu;
859 r = kvm_init_vcpu(cpu);
860 if (r < 0) {
861 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
862 exit(1);
865 qemu_kvm_init_cpu_signals(cpu);
867 /* signal CPU creation */
868 cpu->created = true;
869 qemu_cond_signal(&qemu_cpu_cond);
871 while (1) {
872 if (cpu_can_run(cpu)) {
873 r = kvm_cpu_exec(cpu);
874 if (r == EXCP_DEBUG) {
875 cpu_handle_guest_debug(cpu);
878 qemu_kvm_wait_io_event(cpu);
881 return NULL;
884 static void *qemu_dummy_cpu_thread_fn(void *arg)
886 #ifdef _WIN32
887 fprintf(stderr, "qtest is not supported under Windows\n");
888 exit(1);
889 #else
890 CPUState *cpu = arg;
891 sigset_t waitset;
892 int r;
894 qemu_mutex_lock_iothread();
895 qemu_thread_get_self(cpu->thread);
896 cpu->thread_id = qemu_get_thread_id();
898 sigemptyset(&waitset);
899 sigaddset(&waitset, SIG_IPI);
901 /* signal CPU creation */
902 cpu->created = true;
903 qemu_cond_signal(&qemu_cpu_cond);
905 current_cpu = cpu;
906 while (1) {
907 current_cpu = NULL;
908 qemu_mutex_unlock_iothread();
909 do {
910 int sig;
911 r = sigwait(&waitset, &sig);
912 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
913 if (r == -1) {
914 perror("sigwait");
915 exit(1);
917 qemu_mutex_lock_iothread();
918 current_cpu = cpu;
919 qemu_wait_io_event_common(cpu);
922 return NULL;
923 #endif
926 static void tcg_exec_all(void);
928 static void *qemu_tcg_cpu_thread_fn(void *arg)
930 CPUState *cpu = arg;
932 qemu_tcg_init_cpu_signals();
933 qemu_thread_get_self(cpu->thread);
935 qemu_mutex_lock(&qemu_global_mutex);
936 CPU_FOREACH(cpu) {
937 cpu->thread_id = qemu_get_thread_id();
938 cpu->created = true;
940 qemu_cond_signal(&qemu_cpu_cond);
942 /* wait for initial kick-off after machine start */
943 while (QTAILQ_FIRST(&cpus)->stopped) {
944 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
946 /* process any pending work */
947 CPU_FOREACH(cpu) {
948 qemu_wait_io_event_common(cpu);
952 while (1) {
953 tcg_exec_all();
955 if (use_icount) {
956 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
958 if (deadline == 0) {
959 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
962 qemu_tcg_wait_io_event();
965 return NULL;
968 static void qemu_cpu_kick_thread(CPUState *cpu)
970 #ifndef _WIN32
971 int err;
973 err = pthread_kill(cpu->thread->thread, SIG_IPI);
974 if (err) {
975 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
976 exit(1);
978 #else /* _WIN32 */
979 if (!qemu_cpu_is_self(cpu)) {
980 CONTEXT tcgContext;
982 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
983 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
984 GetLastError());
985 exit(1);
988 /* On multi-core systems, we are not sure that the thread is actually
989 * suspended until we can get the context.
991 tcgContext.ContextFlags = CONTEXT_CONTROL;
992 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
993 continue;
996 cpu_signal(0);
998 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
999 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1000 GetLastError());
1001 exit(1);
1004 #endif
1007 void qemu_cpu_kick(CPUState *cpu)
1009 qemu_cond_broadcast(cpu->halt_cond);
1010 if (!tcg_enabled() && !cpu->thread_kicked) {
1011 qemu_cpu_kick_thread(cpu);
1012 cpu->thread_kicked = true;
1016 void qemu_cpu_kick_self(void)
1018 #ifndef _WIN32
1019 assert(current_cpu);
1021 if (!current_cpu->thread_kicked) {
1022 qemu_cpu_kick_thread(current_cpu);
1023 current_cpu->thread_kicked = true;
1025 #else
1026 abort();
1027 #endif
1030 bool qemu_cpu_is_self(CPUState *cpu)
1032 return qemu_thread_is_self(cpu->thread);
1035 static bool qemu_in_vcpu_thread(void)
1037 return current_cpu && qemu_cpu_is_self(current_cpu);
1040 void qemu_mutex_lock_iothread(void)
1042 if (!tcg_enabled()) {
1043 qemu_mutex_lock(&qemu_global_mutex);
1044 } else {
1045 iothread_requesting_mutex = true;
1046 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1047 qemu_cpu_kick_thread(first_cpu);
1048 qemu_mutex_lock(&qemu_global_mutex);
1050 iothread_requesting_mutex = false;
1051 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1055 void qemu_mutex_unlock_iothread(void)
1057 qemu_mutex_unlock(&qemu_global_mutex);
1060 static int all_vcpus_paused(void)
1062 CPUState *cpu;
1064 CPU_FOREACH(cpu) {
1065 if (!cpu->stopped) {
1066 return 0;
1070 return 1;
1073 void pause_all_vcpus(void)
1075 CPUState *cpu;
1077 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1078 CPU_FOREACH(cpu) {
1079 cpu->stop = true;
1080 qemu_cpu_kick(cpu);
1083 if (qemu_in_vcpu_thread()) {
1084 cpu_stop_current();
1085 if (!kvm_enabled()) {
1086 CPU_FOREACH(cpu) {
1087 cpu->stop = false;
1088 cpu->stopped = true;
1090 return;
1094 while (!all_vcpus_paused()) {
1095 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1096 CPU_FOREACH(cpu) {
1097 qemu_cpu_kick(cpu);
1102 void cpu_resume(CPUState *cpu)
1104 cpu->stop = false;
1105 cpu->stopped = false;
1106 qemu_cpu_kick(cpu);
1109 void resume_all_vcpus(void)
1111 CPUState *cpu;
1113 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1114 CPU_FOREACH(cpu) {
1115 cpu_resume(cpu);
1119 /* For temporary buffers for forming a name */
1120 #define VCPU_THREAD_NAME_SIZE 16
1122 static void qemu_tcg_init_vcpu(CPUState *cpu)
1124 char thread_name[VCPU_THREAD_NAME_SIZE];
1126 tcg_cpu_address_space_init(cpu, cpu->as);
1128 /* share a single thread for all cpus with TCG */
1129 if (!tcg_cpu_thread) {
1130 cpu->thread = g_malloc0(sizeof(QemuThread));
1131 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1132 qemu_cond_init(cpu->halt_cond);
1133 tcg_halt_cond = cpu->halt_cond;
1134 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1135 cpu->cpu_index);
1136 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1137 cpu, QEMU_THREAD_JOINABLE);
1138 #ifdef _WIN32
1139 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1140 #endif
1141 while (!cpu->created) {
1142 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1144 tcg_cpu_thread = cpu->thread;
1145 } else {
1146 cpu->thread = tcg_cpu_thread;
1147 cpu->halt_cond = tcg_halt_cond;
1151 static void qemu_kvm_start_vcpu(CPUState *cpu)
1153 char thread_name[VCPU_THREAD_NAME_SIZE];
1155 cpu->thread = g_malloc0(sizeof(QemuThread));
1156 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1157 qemu_cond_init(cpu->halt_cond);
1158 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1159 cpu->cpu_index);
1160 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1161 cpu, QEMU_THREAD_JOINABLE);
1162 while (!cpu->created) {
1163 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1167 static void qemu_dummy_start_vcpu(CPUState *cpu)
1169 char thread_name[VCPU_THREAD_NAME_SIZE];
1171 cpu->thread = g_malloc0(sizeof(QemuThread));
1172 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1173 qemu_cond_init(cpu->halt_cond);
1174 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1175 cpu->cpu_index);
1176 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1177 QEMU_THREAD_JOINABLE);
1178 while (!cpu->created) {
1179 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1183 void qemu_init_vcpu(CPUState *cpu)
1185 cpu->nr_cores = smp_cores;
1186 cpu->nr_threads = smp_threads;
1187 cpu->stopped = true;
1188 if (kvm_enabled()) {
1189 qemu_kvm_start_vcpu(cpu);
1190 } else if (tcg_enabled()) {
1191 qemu_tcg_init_vcpu(cpu);
1192 } else {
1193 qemu_dummy_start_vcpu(cpu);
1197 void cpu_stop_current(void)
1199 if (current_cpu) {
1200 current_cpu->stop = false;
1201 current_cpu->stopped = true;
1202 cpu_exit(current_cpu);
1203 qemu_cond_signal(&qemu_pause_cond);
1207 int vm_stop(RunState state)
1209 if (qemu_in_vcpu_thread()) {
1210 qemu_system_vmstop_request(state);
1212 * FIXME: should not return to device code in case
1213 * vm_stop() has been requested.
1215 cpu_stop_current();
1216 return 0;
1219 return do_vm_stop(state);
1222 /* does a state transition even if the VM is already stopped,
1223 current state is forgotten forever */
1224 int vm_stop_force_state(RunState state)
1226 if (runstate_is_running()) {
1227 return vm_stop(state);
1228 } else {
1229 runstate_set(state);
1230 /* Make sure to return an error if the flush in a previous vm_stop()
1231 * failed. */
1232 return bdrv_flush_all();
1236 static int tcg_cpu_exec(CPUArchState *env)
1238 CPUState *cpu = ENV_GET_CPU(env);
1239 int ret;
1240 #ifdef CONFIG_PROFILER
1241 int64_t ti;
1242 #endif
1244 #ifdef CONFIG_PROFILER
1245 ti = profile_getclock();
1246 #endif
1247 if (use_icount) {
1248 int64_t count;
1249 int64_t deadline;
1250 int decr;
1251 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1252 cpu->icount_decr.u16.low = 0;
1253 cpu->icount_extra = 0;
1254 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1256 /* Maintain prior (possibly buggy) behaviour where if no deadline
1257 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1258 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1259 * nanoseconds.
1261 if ((deadline < 0) || (deadline > INT32_MAX)) {
1262 deadline = INT32_MAX;
1265 count = qemu_icount_round(deadline);
1266 qemu_icount += count;
1267 decr = (count > 0xffff) ? 0xffff : count;
1268 count -= decr;
1269 cpu->icount_decr.u16.low = decr;
1270 cpu->icount_extra = count;
1272 ret = cpu_exec(env);
1273 #ifdef CONFIG_PROFILER
1274 qemu_time += profile_getclock() - ti;
1275 #endif
1276 if (use_icount) {
1277 /* Fold pending instructions back into the
1278 instruction counter, and clear the interrupt flag. */
1279 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1280 cpu->icount_decr.u32 = 0;
1281 cpu->icount_extra = 0;
1283 return ret;
1286 static void tcg_exec_all(void)
1288 int r;
1290 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1291 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1293 if (next_cpu == NULL) {
1294 next_cpu = first_cpu;
1296 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1297 CPUState *cpu = next_cpu;
1298 CPUArchState *env = cpu->env_ptr;
1300 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1301 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1303 if (cpu_can_run(cpu)) {
1304 r = tcg_cpu_exec(env);
1305 if (r == EXCP_DEBUG) {
1306 cpu_handle_guest_debug(cpu);
1307 break;
1309 } else if (cpu->stop || cpu->stopped) {
1310 break;
1313 exit_request = 0;
1316 void set_numa_modes(void)
1318 CPUState *cpu;
1319 int i;
1321 CPU_FOREACH(cpu) {
1322 for (i = 0; i < nb_numa_nodes; i++) {
1323 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1324 cpu->numa_node = i;
1330 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1332 /* XXX: implement xxx_cpu_list for targets that still miss it */
1333 #if defined(cpu_list)
1334 cpu_list(f, cpu_fprintf);
1335 #endif
1338 CpuInfoList *qmp_query_cpus(Error **errp)
1340 CpuInfoList *head = NULL, *cur_item = NULL;
1341 CPUState *cpu;
1343 CPU_FOREACH(cpu) {
1344 CpuInfoList *info;
1345 #if defined(TARGET_I386)
1346 X86CPU *x86_cpu = X86_CPU(cpu);
1347 CPUX86State *env = &x86_cpu->env;
1348 #elif defined(TARGET_PPC)
1349 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1350 CPUPPCState *env = &ppc_cpu->env;
1351 #elif defined(TARGET_SPARC)
1352 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1353 CPUSPARCState *env = &sparc_cpu->env;
1354 #elif defined(TARGET_MIPS)
1355 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1356 CPUMIPSState *env = &mips_cpu->env;
1357 #endif
1359 cpu_synchronize_state(cpu);
1361 info = g_malloc0(sizeof(*info));
1362 info->value = g_malloc0(sizeof(*info->value));
1363 info->value->CPU = cpu->cpu_index;
1364 info->value->current = (cpu == first_cpu);
1365 info->value->halted = cpu->halted;
1366 info->value->thread_id = cpu->thread_id;
1367 #if defined(TARGET_I386)
1368 info->value->has_pc = true;
1369 info->value->pc = env->eip + env->segs[R_CS].base;
1370 #elif defined(TARGET_PPC)
1371 info->value->has_nip = true;
1372 info->value->nip = env->nip;
1373 #elif defined(TARGET_SPARC)
1374 info->value->has_pc = true;
1375 info->value->pc = env->pc;
1376 info->value->has_npc = true;
1377 info->value->npc = env->npc;
1378 #elif defined(TARGET_MIPS)
1379 info->value->has_PC = true;
1380 info->value->PC = env->active_tc.PC;
1381 #endif
1383 /* XXX: waiting for the qapi to support GSList */
1384 if (!cur_item) {
1385 head = cur_item = info;
1386 } else {
1387 cur_item->next = info;
1388 cur_item = info;
1392 return head;
1395 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1396 bool has_cpu, int64_t cpu_index, Error **errp)
1398 FILE *f;
1399 uint32_t l;
1400 CPUState *cpu;
1401 uint8_t buf[1024];
1403 if (!has_cpu) {
1404 cpu_index = 0;
1407 cpu = qemu_get_cpu(cpu_index);
1408 if (cpu == NULL) {
1409 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1410 "a CPU number");
1411 return;
1414 f = fopen(filename, "wb");
1415 if (!f) {
1416 error_setg_file_open(errp, errno, filename);
1417 return;
1420 while (size != 0) {
1421 l = sizeof(buf);
1422 if (l > size)
1423 l = size;
1424 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1425 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1426 goto exit;
1428 if (fwrite(buf, 1, l, f) != l) {
1429 error_set(errp, QERR_IO_ERROR);
1430 goto exit;
1432 addr += l;
1433 size -= l;
1436 exit:
1437 fclose(f);
1440 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1441 Error **errp)
1443 FILE *f;
1444 uint32_t l;
1445 uint8_t buf[1024];
1447 f = fopen(filename, "wb");
1448 if (!f) {
1449 error_setg_file_open(errp, errno, filename);
1450 return;
1453 while (size != 0) {
1454 l = sizeof(buf);
1455 if (l > size)
1456 l = size;
1457 cpu_physical_memory_read(addr, buf, l);
1458 if (fwrite(buf, 1, l, f) != l) {
1459 error_set(errp, QERR_IO_ERROR);
1460 goto exit;
1462 addr += l;
1463 size -= l;
1466 exit:
1467 fclose(f);
1470 void qmp_inject_nmi(Error **errp)
1472 #if defined(TARGET_I386)
1473 CPUState *cs;
1475 CPU_FOREACH(cs) {
1476 X86CPU *cpu = X86_CPU(cs);
1478 if (!cpu->apic_state) {
1479 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1480 } else {
1481 apic_deliver_nmi(cpu->apic_state);
1484 #elif defined(TARGET_S390X)
1485 CPUState *cs;
1486 S390CPU *cpu;
1488 CPU_FOREACH(cs) {
1489 cpu = S390_CPU(cs);
1490 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1491 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1492 error_set(errp, QERR_UNSUPPORTED);
1493 return;
1495 break;
1498 #else
1499 error_set(errp, QERR_UNSUPPORTED);
1500 #endif