acpi:ich9: convert cpu hotplug to hotplug_handler API
[qemu/rayw.git] / cpus.c
blob0c33458bb18eef7770ab916582c22a4013bad18f
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
43 #include "hw/nmi.h"
45 #ifndef _WIN32
46 #include "qemu/compatfd.h"
47 #endif
49 #ifdef CONFIG_LINUX
51 #include <sys/prctl.h>
53 #ifndef PR_MCE_KILL
54 #define PR_MCE_KILL 33
55 #endif
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
59 #endif
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
63 #endif
65 #endif /* CONFIG_LINUX */
67 static CPUState *next_cpu;
68 int64_t max_delay;
69 int64_t max_advance;
71 bool cpu_is_stopped(CPUState *cpu)
73 return cpu->stopped || !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState *cpu)
78 if (cpu->stop || cpu->queued_work_first) {
79 return false;
81 if (cpu_is_stopped(cpu)) {
82 return true;
84 if (!cpu->halted || cpu_has_work(cpu) ||
85 kvm_halt_in_kernel()) {
86 return false;
88 return true;
91 static bool all_cpu_threads_idle(void)
93 CPUState *cpu;
95 CPU_FOREACH(cpu) {
96 if (!cpu_thread_is_idle(cpu)) {
97 return false;
100 return true;
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static int64_t vm_clock_warp_start = -1;
109 /* Conversion factor from emulated instructions to virtual clock ticks. */
110 static int icount_time_shift;
111 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112 #define MAX_ICOUNT_SHIFT 10
114 static QEMUTimer *icount_rt_timer;
115 static QEMUTimer *icount_vm_timer;
116 static QEMUTimer *icount_warp_timer;
118 typedef struct TimersState {
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
126 QemuSeqLock vm_clock_seqlock;
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
135 } TimersState;
137 static TimersState timers_state;
139 /* Return the virtual CPU time, based on the instruction counter. */
140 static int64_t cpu_get_icount_locked(void)
142 int64_t icount;
143 CPUState *cpu = current_cpu;
145 icount = timers_state.qemu_icount;
146 if (cpu) {
147 if (!cpu_can_do_io(cpu)) {
148 fprintf(stderr, "Bad clock read\n");
150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
152 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
155 int64_t cpu_get_icount(void)
157 int64_t icount;
158 unsigned start;
160 do {
161 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
162 icount = cpu_get_icount_locked();
163 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
165 return icount;
168 int64_t cpu_icount_to_ns(int64_t icount)
170 return icount << icount_time_shift;
173 /* return the host CPU cycle counter and handle stop/restart */
174 /* Caller must hold the BQL */
175 int64_t cpu_get_ticks(void)
177 int64_t ticks;
179 if (use_icount) {
180 return cpu_get_icount();
183 ticks = timers_state.cpu_ticks_offset;
184 if (timers_state.cpu_ticks_enabled) {
185 ticks += cpu_get_real_ticks();
188 if (timers_state.cpu_ticks_prev > ticks) {
189 /* Note: non increasing ticks may happen if the host uses
190 software suspend */
191 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
192 ticks = timers_state.cpu_ticks_prev;
195 timers_state.cpu_ticks_prev = ticks;
196 return ticks;
199 static int64_t cpu_get_clock_locked(void)
201 int64_t ticks;
203 ticks = timers_state.cpu_clock_offset;
204 if (timers_state.cpu_ticks_enabled) {
205 ticks += get_clock();
208 return ticks;
211 /* return the host CPU monotonic timer and handle stop/restart */
212 int64_t cpu_get_clock(void)
214 int64_t ti;
215 unsigned start;
217 do {
218 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
219 ti = cpu_get_clock_locked();
220 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
222 return ti;
225 /* return the offset between the host clock and virtual CPU clock */
226 int64_t cpu_get_clock_offset(void)
228 int64_t ti;
229 unsigned start;
231 do {
232 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
233 ti = timers_state.cpu_clock_offset;
234 if (!timers_state.cpu_ticks_enabled) {
235 ti -= get_clock();
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
239 return -ti;
242 /* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 void cpu_enable_ticks(void)
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
249 if (!timers_state.cpu_ticks_enabled) {
250 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
257 /* disable cpu_get_ticks() : the clock is stopped. You must not call
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 void cpu_disable_ticks(void)
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
265 if (timers_state.cpu_ticks_enabled) {
266 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
268 timers_state.cpu_ticks_enabled = 0;
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
273 /* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
276 the IO wait loop. */
277 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279 static void icount_adjust(void)
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
285 /* Protected by TimersState mutex. */
286 static int64_t last_delta;
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
290 return;
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
297 delta = cur_icount - cur_time;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302 /* The guest is getting too far ahead. Slow time down. */
303 icount_time_shift--;
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308 /* The guest is getting too far behind. Speed time up. */
309 icount_time_shift++;
311 last_delta = delta;
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
317 static void icount_adjust_rt(void *opaque)
319 timer_mod(icount_rt_timer,
320 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
321 icount_adjust();
324 static void icount_adjust_vm(void *opaque)
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
329 icount_adjust();
332 static int64_t qemu_icount_round(int64_t count)
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
337 static void icount_warp_rt(void *opaque)
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
342 if (atomic_read(&vm_clock_warp_start) == -1) {
343 return;
346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
347 if (runstate_is_running()) {
348 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
349 int64_t warp_delta;
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
355 * far ahead of real time.
357 int64_t cur_time = cpu_get_clock_locked();
358 int64_t cur_icount = cpu_get_icount_locked();
359 int64_t delta = cur_time - cur_icount;
360 warp_delta = MIN(warp_delta, delta);
362 timers_state.qemu_icount_bias += warp_delta;
364 vm_clock_warp_start = -1;
365 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
372 void qtest_clock_warp(int64_t dest)
374 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
375 assert(qtest_enabled());
376 while (clock < dest) {
377 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
378 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
379 seqlock_write_lock(&timers_state.vm_clock_seqlock);
380 timers_state.qemu_icount_bias += warp;
381 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
383 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
384 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
389 void qemu_clock_warp(QEMUClockType type)
391 int64_t clock;
392 int64_t deadline;
395 * There are too many global variables to make the "warp" behavior
396 * applicable to other clocks. But a clock argument removes the
397 * need for if statements all over the place.
399 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
400 return;
404 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
405 * This ensures that the deadline for the timer is computed correctly below.
406 * This also makes sure that the insn counter is synchronized before the
407 * CPU starts running, in case the CPU is woken by an event other than
408 * the earliest QEMU_CLOCK_VIRTUAL timer.
410 icount_warp_rt(NULL);
411 timer_del(icount_warp_timer);
412 if (!all_cpu_threads_idle()) {
413 return;
416 if (qtest_enabled()) {
417 /* When testing, qtest commands advance icount. */
418 return;
421 /* We want to use the earliest deadline from ALL vm_clocks */
422 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
423 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
424 if (deadline < 0) {
425 return;
428 if (deadline > 0) {
430 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
431 * sleep. Otherwise, the CPU might be waiting for a future timer
432 * interrupt to wake it up, but the interrupt never comes because
433 * the vCPU isn't running any insns and thus doesn't advance the
434 * QEMU_CLOCK_VIRTUAL.
436 * An extreme solution for this problem would be to never let VCPUs
437 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
438 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
439 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
440 * after some e"real" time, (related to the time left until the next
441 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
442 * This avoids that the warps are visible externally; for example,
443 * you will not be sending network packets continuously instead of
444 * every 100ms.
446 seqlock_write_lock(&timers_state.vm_clock_seqlock);
447 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
448 vm_clock_warp_start = clock;
450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
451 timer_mod_anticipate(icount_warp_timer, clock + deadline);
452 } else if (deadline == 0) {
453 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
457 static bool icount_state_needed(void *opaque)
459 return use_icount;
463 * This is a subsection for icount migration.
465 static const VMStateDescription icount_vmstate_timers = {
466 .name = "timer/icount",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT64(qemu_icount_bias, TimersState),
471 VMSTATE_INT64(qemu_icount, TimersState),
472 VMSTATE_END_OF_LIST()
476 static const VMStateDescription vmstate_timers = {
477 .name = "timer",
478 .version_id = 2,
479 .minimum_version_id = 1,
480 .fields = (VMStateField[]) {
481 VMSTATE_INT64(cpu_ticks_offset, TimersState),
482 VMSTATE_INT64(dummy, TimersState),
483 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
484 VMSTATE_END_OF_LIST()
486 .subsections = (VMStateSubsection[]) {
488 .vmsd = &icount_vmstate_timers,
489 .needed = icount_state_needed,
490 }, {
491 /* empty */
496 void cpu_ticks_init(void)
498 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
499 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
502 void configure_icount(QemuOpts *opts, Error **errp)
504 const char *option;
505 char *rem_str = NULL;
507 option = qemu_opt_get(opts, "shift");
508 if (!option) {
509 if (qemu_opt_get(opts, "align") != NULL) {
510 error_setg(errp, "Please specify shift option when using align");
512 return;
514 icount_align_option = qemu_opt_get_bool(opts, "align", false);
515 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
516 icount_warp_rt, NULL);
517 if (strcmp(option, "auto") != 0) {
518 errno = 0;
519 icount_time_shift = strtol(option, &rem_str, 0);
520 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
521 error_setg(errp, "icount: Invalid shift value");
523 use_icount = 1;
524 return;
525 } else if (icount_align_option) {
526 error_setg(errp, "shift=auto and align=on are incompatible");
529 use_icount = 2;
531 /* 125MIPS seems a reasonable initial guess at the guest speed.
532 It will be corrected fairly quickly anyway. */
533 icount_time_shift = 3;
535 /* Have both realtime and virtual time triggers for speed adjustment.
536 The realtime trigger catches emulated time passing too slowly,
537 the virtual time trigger catches emulated time passing too fast.
538 Realtime triggers occur even when idle, so use them less frequently
539 than VM triggers. */
540 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
541 icount_adjust_rt, NULL);
542 timer_mod(icount_rt_timer,
543 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
544 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
545 icount_adjust_vm, NULL);
546 timer_mod(icount_vm_timer,
547 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
548 get_ticks_per_sec() / 10);
551 /***********************************************************/
552 void hw_error(const char *fmt, ...)
554 va_list ap;
555 CPUState *cpu;
557 va_start(ap, fmt);
558 fprintf(stderr, "qemu: hardware error: ");
559 vfprintf(stderr, fmt, ap);
560 fprintf(stderr, "\n");
561 CPU_FOREACH(cpu) {
562 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
563 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
565 va_end(ap);
566 abort();
569 void cpu_synchronize_all_states(void)
571 CPUState *cpu;
573 CPU_FOREACH(cpu) {
574 cpu_synchronize_state(cpu);
578 void cpu_synchronize_all_post_reset(void)
580 CPUState *cpu;
582 CPU_FOREACH(cpu) {
583 cpu_synchronize_post_reset(cpu);
587 void cpu_synchronize_all_post_init(void)
589 CPUState *cpu;
591 CPU_FOREACH(cpu) {
592 cpu_synchronize_post_init(cpu);
596 void cpu_clean_all_dirty(void)
598 CPUState *cpu;
600 CPU_FOREACH(cpu) {
601 cpu_clean_state(cpu);
605 static int do_vm_stop(RunState state)
607 int ret = 0;
609 if (runstate_is_running()) {
610 cpu_disable_ticks();
611 pause_all_vcpus();
612 runstate_set(state);
613 vm_state_notify(0, state);
614 qapi_event_send_stop(&error_abort);
617 bdrv_drain_all();
618 ret = bdrv_flush_all();
620 return ret;
623 static bool cpu_can_run(CPUState *cpu)
625 if (cpu->stop) {
626 return false;
628 if (cpu_is_stopped(cpu)) {
629 return false;
631 return true;
634 static void cpu_handle_guest_debug(CPUState *cpu)
636 gdb_set_stop_cpu(cpu);
637 qemu_system_debug_request();
638 cpu->stopped = true;
641 static void cpu_signal(int sig)
643 if (current_cpu) {
644 cpu_exit(current_cpu);
646 exit_request = 1;
649 #ifdef CONFIG_LINUX
650 static void sigbus_reraise(void)
652 sigset_t set;
653 struct sigaction action;
655 memset(&action, 0, sizeof(action));
656 action.sa_handler = SIG_DFL;
657 if (!sigaction(SIGBUS, &action, NULL)) {
658 raise(SIGBUS);
659 sigemptyset(&set);
660 sigaddset(&set, SIGBUS);
661 sigprocmask(SIG_UNBLOCK, &set, NULL);
663 perror("Failed to re-raise SIGBUS!\n");
664 abort();
667 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
668 void *ctx)
670 if (kvm_on_sigbus(siginfo->ssi_code,
671 (void *)(intptr_t)siginfo->ssi_addr)) {
672 sigbus_reraise();
676 static void qemu_init_sigbus(void)
678 struct sigaction action;
680 memset(&action, 0, sizeof(action));
681 action.sa_flags = SA_SIGINFO;
682 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
683 sigaction(SIGBUS, &action, NULL);
685 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
688 static void qemu_kvm_eat_signals(CPUState *cpu)
690 struct timespec ts = { 0, 0 };
691 siginfo_t siginfo;
692 sigset_t waitset;
693 sigset_t chkset;
694 int r;
696 sigemptyset(&waitset);
697 sigaddset(&waitset, SIG_IPI);
698 sigaddset(&waitset, SIGBUS);
700 do {
701 r = sigtimedwait(&waitset, &siginfo, &ts);
702 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
703 perror("sigtimedwait");
704 exit(1);
707 switch (r) {
708 case SIGBUS:
709 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
710 sigbus_reraise();
712 break;
713 default:
714 break;
717 r = sigpending(&chkset);
718 if (r == -1) {
719 perror("sigpending");
720 exit(1);
722 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
725 #else /* !CONFIG_LINUX */
727 static void qemu_init_sigbus(void)
731 static void qemu_kvm_eat_signals(CPUState *cpu)
734 #endif /* !CONFIG_LINUX */
736 #ifndef _WIN32
737 static void dummy_signal(int sig)
741 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
743 int r;
744 sigset_t set;
745 struct sigaction sigact;
747 memset(&sigact, 0, sizeof(sigact));
748 sigact.sa_handler = dummy_signal;
749 sigaction(SIG_IPI, &sigact, NULL);
751 pthread_sigmask(SIG_BLOCK, NULL, &set);
752 sigdelset(&set, SIG_IPI);
753 sigdelset(&set, SIGBUS);
754 r = kvm_set_signal_mask(cpu, &set);
755 if (r) {
756 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
757 exit(1);
761 static void qemu_tcg_init_cpu_signals(void)
763 sigset_t set;
764 struct sigaction sigact;
766 memset(&sigact, 0, sizeof(sigact));
767 sigact.sa_handler = cpu_signal;
768 sigaction(SIG_IPI, &sigact, NULL);
770 sigemptyset(&set);
771 sigaddset(&set, SIG_IPI);
772 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
775 #else /* _WIN32 */
776 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
778 abort();
781 static void qemu_tcg_init_cpu_signals(void)
784 #endif /* _WIN32 */
786 static QemuMutex qemu_global_mutex;
787 static QemuCond qemu_io_proceeded_cond;
788 static bool iothread_requesting_mutex;
790 static QemuThread io_thread;
792 static QemuThread *tcg_cpu_thread;
793 static QemuCond *tcg_halt_cond;
795 /* cpu creation */
796 static QemuCond qemu_cpu_cond;
797 /* system init */
798 static QemuCond qemu_pause_cond;
799 static QemuCond qemu_work_cond;
801 void qemu_init_cpu_loop(void)
803 qemu_init_sigbus();
804 qemu_cond_init(&qemu_cpu_cond);
805 qemu_cond_init(&qemu_pause_cond);
806 qemu_cond_init(&qemu_work_cond);
807 qemu_cond_init(&qemu_io_proceeded_cond);
808 qemu_mutex_init(&qemu_global_mutex);
810 qemu_thread_get_self(&io_thread);
813 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
815 struct qemu_work_item wi;
817 if (qemu_cpu_is_self(cpu)) {
818 func(data);
819 return;
822 wi.func = func;
823 wi.data = data;
824 wi.free = false;
825 if (cpu->queued_work_first == NULL) {
826 cpu->queued_work_first = &wi;
827 } else {
828 cpu->queued_work_last->next = &wi;
830 cpu->queued_work_last = &wi;
831 wi.next = NULL;
832 wi.done = false;
834 qemu_cpu_kick(cpu);
835 while (!wi.done) {
836 CPUState *self_cpu = current_cpu;
838 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
839 current_cpu = self_cpu;
843 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
845 struct qemu_work_item *wi;
847 if (qemu_cpu_is_self(cpu)) {
848 func(data);
849 return;
852 wi = g_malloc0(sizeof(struct qemu_work_item));
853 wi->func = func;
854 wi->data = data;
855 wi->free = true;
856 if (cpu->queued_work_first == NULL) {
857 cpu->queued_work_first = wi;
858 } else {
859 cpu->queued_work_last->next = wi;
861 cpu->queued_work_last = wi;
862 wi->next = NULL;
863 wi->done = false;
865 qemu_cpu_kick(cpu);
868 static void flush_queued_work(CPUState *cpu)
870 struct qemu_work_item *wi;
872 if (cpu->queued_work_first == NULL) {
873 return;
876 while ((wi = cpu->queued_work_first)) {
877 cpu->queued_work_first = wi->next;
878 wi->func(wi->data);
879 wi->done = true;
880 if (wi->free) {
881 g_free(wi);
884 cpu->queued_work_last = NULL;
885 qemu_cond_broadcast(&qemu_work_cond);
888 static void qemu_wait_io_event_common(CPUState *cpu)
890 if (cpu->stop) {
891 cpu->stop = false;
892 cpu->stopped = true;
893 qemu_cond_signal(&qemu_pause_cond);
895 flush_queued_work(cpu);
896 cpu->thread_kicked = false;
899 static void qemu_tcg_wait_io_event(void)
901 CPUState *cpu;
903 while (all_cpu_threads_idle()) {
904 /* Start accounting real time to the virtual clock if the CPUs
905 are idle. */
906 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
907 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
910 while (iothread_requesting_mutex) {
911 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
914 CPU_FOREACH(cpu) {
915 qemu_wait_io_event_common(cpu);
919 static void qemu_kvm_wait_io_event(CPUState *cpu)
921 while (cpu_thread_is_idle(cpu)) {
922 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
925 qemu_kvm_eat_signals(cpu);
926 qemu_wait_io_event_common(cpu);
929 static void *qemu_kvm_cpu_thread_fn(void *arg)
931 CPUState *cpu = arg;
932 int r;
934 qemu_mutex_lock(&qemu_global_mutex);
935 qemu_thread_get_self(cpu->thread);
936 cpu->thread_id = qemu_get_thread_id();
937 current_cpu = cpu;
939 r = kvm_init_vcpu(cpu);
940 if (r < 0) {
941 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
942 exit(1);
945 qemu_kvm_init_cpu_signals(cpu);
947 /* signal CPU creation */
948 cpu->created = true;
949 qemu_cond_signal(&qemu_cpu_cond);
951 while (1) {
952 if (cpu_can_run(cpu)) {
953 r = kvm_cpu_exec(cpu);
954 if (r == EXCP_DEBUG) {
955 cpu_handle_guest_debug(cpu);
958 qemu_kvm_wait_io_event(cpu);
961 return NULL;
964 static void *qemu_dummy_cpu_thread_fn(void *arg)
966 #ifdef _WIN32
967 fprintf(stderr, "qtest is not supported under Windows\n");
968 exit(1);
969 #else
970 CPUState *cpu = arg;
971 sigset_t waitset;
972 int r;
974 qemu_mutex_lock_iothread();
975 qemu_thread_get_self(cpu->thread);
976 cpu->thread_id = qemu_get_thread_id();
978 sigemptyset(&waitset);
979 sigaddset(&waitset, SIG_IPI);
981 /* signal CPU creation */
982 cpu->created = true;
983 qemu_cond_signal(&qemu_cpu_cond);
985 current_cpu = cpu;
986 while (1) {
987 current_cpu = NULL;
988 qemu_mutex_unlock_iothread();
989 do {
990 int sig;
991 r = sigwait(&waitset, &sig);
992 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
993 if (r == -1) {
994 perror("sigwait");
995 exit(1);
997 qemu_mutex_lock_iothread();
998 current_cpu = cpu;
999 qemu_wait_io_event_common(cpu);
1002 return NULL;
1003 #endif
1006 static void tcg_exec_all(void);
1008 static void *qemu_tcg_cpu_thread_fn(void *arg)
1010 CPUState *cpu = arg;
1012 qemu_tcg_init_cpu_signals();
1013 qemu_thread_get_self(cpu->thread);
1015 qemu_mutex_lock(&qemu_global_mutex);
1016 CPU_FOREACH(cpu) {
1017 cpu->thread_id = qemu_get_thread_id();
1018 cpu->created = true;
1020 qemu_cond_signal(&qemu_cpu_cond);
1022 /* wait for initial kick-off after machine start */
1023 while (QTAILQ_FIRST(&cpus)->stopped) {
1024 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1026 /* process any pending work */
1027 CPU_FOREACH(cpu) {
1028 qemu_wait_io_event_common(cpu);
1032 while (1) {
1033 tcg_exec_all();
1035 if (use_icount) {
1036 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1038 if (deadline == 0) {
1039 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1042 qemu_tcg_wait_io_event();
1045 return NULL;
1048 static void qemu_cpu_kick_thread(CPUState *cpu)
1050 #ifndef _WIN32
1051 int err;
1053 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1054 if (err) {
1055 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1056 exit(1);
1058 #else /* _WIN32 */
1059 if (!qemu_cpu_is_self(cpu)) {
1060 CONTEXT tcgContext;
1062 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1063 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1064 GetLastError());
1065 exit(1);
1068 /* On multi-core systems, we are not sure that the thread is actually
1069 * suspended until we can get the context.
1071 tcgContext.ContextFlags = CONTEXT_CONTROL;
1072 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1073 continue;
1076 cpu_signal(0);
1078 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1079 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1080 GetLastError());
1081 exit(1);
1084 #endif
1087 void qemu_cpu_kick(CPUState *cpu)
1089 qemu_cond_broadcast(cpu->halt_cond);
1090 if (!tcg_enabled() && !cpu->thread_kicked) {
1091 qemu_cpu_kick_thread(cpu);
1092 cpu->thread_kicked = true;
1096 void qemu_cpu_kick_self(void)
1098 #ifndef _WIN32
1099 assert(current_cpu);
1101 if (!current_cpu->thread_kicked) {
1102 qemu_cpu_kick_thread(current_cpu);
1103 current_cpu->thread_kicked = true;
1105 #else
1106 abort();
1107 #endif
1110 bool qemu_cpu_is_self(CPUState *cpu)
1112 return qemu_thread_is_self(cpu->thread);
1115 static bool qemu_in_vcpu_thread(void)
1117 return current_cpu && qemu_cpu_is_self(current_cpu);
1120 void qemu_mutex_lock_iothread(void)
1122 if (!tcg_enabled()) {
1123 qemu_mutex_lock(&qemu_global_mutex);
1124 } else {
1125 iothread_requesting_mutex = true;
1126 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1127 qemu_cpu_kick_thread(first_cpu);
1128 qemu_mutex_lock(&qemu_global_mutex);
1130 iothread_requesting_mutex = false;
1131 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1135 void qemu_mutex_unlock_iothread(void)
1137 qemu_mutex_unlock(&qemu_global_mutex);
1140 static int all_vcpus_paused(void)
1142 CPUState *cpu;
1144 CPU_FOREACH(cpu) {
1145 if (!cpu->stopped) {
1146 return 0;
1150 return 1;
1153 void pause_all_vcpus(void)
1155 CPUState *cpu;
1157 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1158 CPU_FOREACH(cpu) {
1159 cpu->stop = true;
1160 qemu_cpu_kick(cpu);
1163 if (qemu_in_vcpu_thread()) {
1164 cpu_stop_current();
1165 if (!kvm_enabled()) {
1166 CPU_FOREACH(cpu) {
1167 cpu->stop = false;
1168 cpu->stopped = true;
1170 return;
1174 while (!all_vcpus_paused()) {
1175 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1176 CPU_FOREACH(cpu) {
1177 qemu_cpu_kick(cpu);
1182 void cpu_resume(CPUState *cpu)
1184 cpu->stop = false;
1185 cpu->stopped = false;
1186 qemu_cpu_kick(cpu);
1189 void resume_all_vcpus(void)
1191 CPUState *cpu;
1193 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1194 CPU_FOREACH(cpu) {
1195 cpu_resume(cpu);
1199 /* For temporary buffers for forming a name */
1200 #define VCPU_THREAD_NAME_SIZE 16
1202 static void qemu_tcg_init_vcpu(CPUState *cpu)
1204 char thread_name[VCPU_THREAD_NAME_SIZE];
1206 tcg_cpu_address_space_init(cpu, cpu->as);
1208 /* share a single thread for all cpus with TCG */
1209 if (!tcg_cpu_thread) {
1210 cpu->thread = g_malloc0(sizeof(QemuThread));
1211 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1212 qemu_cond_init(cpu->halt_cond);
1213 tcg_halt_cond = cpu->halt_cond;
1214 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1215 cpu->cpu_index);
1216 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1217 cpu, QEMU_THREAD_JOINABLE);
1218 #ifdef _WIN32
1219 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1220 #endif
1221 while (!cpu->created) {
1222 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1224 tcg_cpu_thread = cpu->thread;
1225 } else {
1226 cpu->thread = tcg_cpu_thread;
1227 cpu->halt_cond = tcg_halt_cond;
1231 static void qemu_kvm_start_vcpu(CPUState *cpu)
1233 char thread_name[VCPU_THREAD_NAME_SIZE];
1235 cpu->thread = g_malloc0(sizeof(QemuThread));
1236 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1237 qemu_cond_init(cpu->halt_cond);
1238 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1239 cpu->cpu_index);
1240 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1241 cpu, QEMU_THREAD_JOINABLE);
1242 while (!cpu->created) {
1243 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1247 static void qemu_dummy_start_vcpu(CPUState *cpu)
1249 char thread_name[VCPU_THREAD_NAME_SIZE];
1251 cpu->thread = g_malloc0(sizeof(QemuThread));
1252 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1253 qemu_cond_init(cpu->halt_cond);
1254 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1255 cpu->cpu_index);
1256 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1257 QEMU_THREAD_JOINABLE);
1258 while (!cpu->created) {
1259 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1263 void qemu_init_vcpu(CPUState *cpu)
1265 cpu->nr_cores = smp_cores;
1266 cpu->nr_threads = smp_threads;
1267 cpu->stopped = true;
1268 if (kvm_enabled()) {
1269 qemu_kvm_start_vcpu(cpu);
1270 } else if (tcg_enabled()) {
1271 qemu_tcg_init_vcpu(cpu);
1272 } else {
1273 qemu_dummy_start_vcpu(cpu);
1277 void cpu_stop_current(void)
1279 if (current_cpu) {
1280 current_cpu->stop = false;
1281 current_cpu->stopped = true;
1282 cpu_exit(current_cpu);
1283 qemu_cond_signal(&qemu_pause_cond);
1287 int vm_stop(RunState state)
1289 if (qemu_in_vcpu_thread()) {
1290 qemu_system_vmstop_request_prepare();
1291 qemu_system_vmstop_request(state);
1293 * FIXME: should not return to device code in case
1294 * vm_stop() has been requested.
1296 cpu_stop_current();
1297 return 0;
1300 return do_vm_stop(state);
1303 /* does a state transition even if the VM is already stopped,
1304 current state is forgotten forever */
1305 int vm_stop_force_state(RunState state)
1307 if (runstate_is_running()) {
1308 return vm_stop(state);
1309 } else {
1310 runstate_set(state);
1311 /* Make sure to return an error if the flush in a previous vm_stop()
1312 * failed. */
1313 return bdrv_flush_all();
1317 static int tcg_cpu_exec(CPUArchState *env)
1319 CPUState *cpu = ENV_GET_CPU(env);
1320 int ret;
1321 #ifdef CONFIG_PROFILER
1322 int64_t ti;
1323 #endif
1325 #ifdef CONFIG_PROFILER
1326 ti = profile_getclock();
1327 #endif
1328 if (use_icount) {
1329 int64_t count;
1330 int64_t deadline;
1331 int decr;
1332 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1333 + cpu->icount_extra);
1334 cpu->icount_decr.u16.low = 0;
1335 cpu->icount_extra = 0;
1336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1338 /* Maintain prior (possibly buggy) behaviour where if no deadline
1339 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1340 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1341 * nanoseconds.
1343 if ((deadline < 0) || (deadline > INT32_MAX)) {
1344 deadline = INT32_MAX;
1347 count = qemu_icount_round(deadline);
1348 timers_state.qemu_icount += count;
1349 decr = (count > 0xffff) ? 0xffff : count;
1350 count -= decr;
1351 cpu->icount_decr.u16.low = decr;
1352 cpu->icount_extra = count;
1354 ret = cpu_exec(env);
1355 #ifdef CONFIG_PROFILER
1356 qemu_time += profile_getclock() - ti;
1357 #endif
1358 if (use_icount) {
1359 /* Fold pending instructions back into the
1360 instruction counter, and clear the interrupt flag. */
1361 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1362 + cpu->icount_extra);
1363 cpu->icount_decr.u32 = 0;
1364 cpu->icount_extra = 0;
1366 return ret;
1369 static void tcg_exec_all(void)
1371 int r;
1373 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1374 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1376 if (next_cpu == NULL) {
1377 next_cpu = first_cpu;
1379 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1380 CPUState *cpu = next_cpu;
1381 CPUArchState *env = cpu->env_ptr;
1383 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1384 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1386 if (cpu_can_run(cpu)) {
1387 r = tcg_cpu_exec(env);
1388 if (r == EXCP_DEBUG) {
1389 cpu_handle_guest_debug(cpu);
1390 break;
1392 } else if (cpu->stop || cpu->stopped) {
1393 break;
1396 exit_request = 0;
1399 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1401 /* XXX: implement xxx_cpu_list for targets that still miss it */
1402 #if defined(cpu_list)
1403 cpu_list(f, cpu_fprintf);
1404 #endif
1407 CpuInfoList *qmp_query_cpus(Error **errp)
1409 CpuInfoList *head = NULL, *cur_item = NULL;
1410 CPUState *cpu;
1412 CPU_FOREACH(cpu) {
1413 CpuInfoList *info;
1414 #if defined(TARGET_I386)
1415 X86CPU *x86_cpu = X86_CPU(cpu);
1416 CPUX86State *env = &x86_cpu->env;
1417 #elif defined(TARGET_PPC)
1418 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1419 CPUPPCState *env = &ppc_cpu->env;
1420 #elif defined(TARGET_SPARC)
1421 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1422 CPUSPARCState *env = &sparc_cpu->env;
1423 #elif defined(TARGET_MIPS)
1424 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1425 CPUMIPSState *env = &mips_cpu->env;
1426 #elif defined(TARGET_TRICORE)
1427 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1428 CPUTriCoreState *env = &tricore_cpu->env;
1429 #endif
1431 cpu_synchronize_state(cpu);
1433 info = g_malloc0(sizeof(*info));
1434 info->value = g_malloc0(sizeof(*info->value));
1435 info->value->CPU = cpu->cpu_index;
1436 info->value->current = (cpu == first_cpu);
1437 info->value->halted = cpu->halted;
1438 info->value->thread_id = cpu->thread_id;
1439 #if defined(TARGET_I386)
1440 info->value->has_pc = true;
1441 info->value->pc = env->eip + env->segs[R_CS].base;
1442 #elif defined(TARGET_PPC)
1443 info->value->has_nip = true;
1444 info->value->nip = env->nip;
1445 #elif defined(TARGET_SPARC)
1446 info->value->has_pc = true;
1447 info->value->pc = env->pc;
1448 info->value->has_npc = true;
1449 info->value->npc = env->npc;
1450 #elif defined(TARGET_MIPS)
1451 info->value->has_PC = true;
1452 info->value->PC = env->active_tc.PC;
1453 #elif defined(TARGET_TRICORE)
1454 info->value->has_PC = true;
1455 info->value->PC = env->PC;
1456 #endif
1458 /* XXX: waiting for the qapi to support GSList */
1459 if (!cur_item) {
1460 head = cur_item = info;
1461 } else {
1462 cur_item->next = info;
1463 cur_item = info;
1467 return head;
1470 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1471 bool has_cpu, int64_t cpu_index, Error **errp)
1473 FILE *f;
1474 uint32_t l;
1475 CPUState *cpu;
1476 uint8_t buf[1024];
1478 if (!has_cpu) {
1479 cpu_index = 0;
1482 cpu = qemu_get_cpu(cpu_index);
1483 if (cpu == NULL) {
1484 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1485 "a CPU number");
1486 return;
1489 f = fopen(filename, "wb");
1490 if (!f) {
1491 error_setg_file_open(errp, errno, filename);
1492 return;
1495 while (size != 0) {
1496 l = sizeof(buf);
1497 if (l > size)
1498 l = size;
1499 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1500 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1501 goto exit;
1503 if (fwrite(buf, 1, l, f) != l) {
1504 error_set(errp, QERR_IO_ERROR);
1505 goto exit;
1507 addr += l;
1508 size -= l;
1511 exit:
1512 fclose(f);
1515 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1516 Error **errp)
1518 FILE *f;
1519 uint32_t l;
1520 uint8_t buf[1024];
1522 f = fopen(filename, "wb");
1523 if (!f) {
1524 error_setg_file_open(errp, errno, filename);
1525 return;
1528 while (size != 0) {
1529 l = sizeof(buf);
1530 if (l > size)
1531 l = size;
1532 cpu_physical_memory_read(addr, buf, l);
1533 if (fwrite(buf, 1, l, f) != l) {
1534 error_set(errp, QERR_IO_ERROR);
1535 goto exit;
1537 addr += l;
1538 size -= l;
1541 exit:
1542 fclose(f);
1545 void qmp_inject_nmi(Error **errp)
1547 #if defined(TARGET_I386)
1548 CPUState *cs;
1550 CPU_FOREACH(cs) {
1551 X86CPU *cpu = X86_CPU(cs);
1553 if (!cpu->apic_state) {
1554 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1555 } else {
1556 apic_deliver_nmi(cpu->apic_state);
1559 #else
1560 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1561 #endif
1564 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1566 if (!use_icount) {
1567 return;
1570 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1571 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1572 if (icount_align_option) {
1573 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1574 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1575 } else {
1576 cpu_fprintf(f, "Max guest delay NA\n");
1577 cpu_fprintf(f, "Max guest advance NA\n");