4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
34 #include "qemu-thread.h"
42 #define SIG_IPI (SIGRTMIN+4)
44 #define SIG_IPI SIGUSR1
49 #include <sys/prctl.h>
52 #define PR_MCE_KILL 33
55 #ifndef PR_MCE_KILL_SET
56 #define PR_MCE_KILL_SET 1
59 #ifndef PR_MCE_KILL_EARLY
60 #define PR_MCE_KILL_EARLY 1
63 #endif /* CONFIG_LINUX */
65 static CPUState
*next_cpu
;
67 /***********************************************************/
68 void hw_error(const char *fmt
, ...)
74 fprintf(stderr
, "qemu: hardware error: ");
75 vfprintf(stderr
, fmt
, ap
);
76 fprintf(stderr
, "\n");
77 for(env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
78 fprintf(stderr
, "CPU #%d:\n", env
->cpu_index
);
80 cpu_dump_state(env
, stderr
, fprintf
, X86_DUMP_FPU
);
82 cpu_dump_state(env
, stderr
, fprintf
, 0);
89 void cpu_synchronize_all_states(void)
93 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
94 cpu_synchronize_state(cpu
);
98 void cpu_synchronize_all_post_reset(void)
102 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
103 cpu_synchronize_post_reset(cpu
);
107 void cpu_synchronize_all_post_init(void)
111 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
112 cpu_synchronize_post_init(cpu
);
116 int cpu_is_stopped(CPUState
*env
)
118 return !runstate_is_running() || env
->stopped
;
121 static void do_vm_stop(RunState state
)
123 if (runstate_is_running()) {
127 vm_state_notify(0, state
);
130 monitor_protocol_event(QEVENT_STOP
, NULL
);
134 static int cpu_can_run(CPUState
*env
)
139 if (env
->stopped
|| !runstate_is_running()) {
145 static bool cpu_thread_is_idle(CPUState
*env
)
147 if (env
->stop
|| env
->queued_work_first
) {
150 if (env
->stopped
|| !runstate_is_running()) {
153 if (!env
->halted
|| qemu_cpu_has_work(env
) ||
154 (kvm_enabled() && kvm_irqchip_in_kernel())) {
160 bool all_cpu_threads_idle(void)
164 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
165 if (!cpu_thread_is_idle(env
)) {
172 static void cpu_handle_guest_debug(CPUState
*env
)
174 gdb_set_stop_cpu(env
);
175 qemu_system_debug_request();
179 static void cpu_signal(int sig
)
181 if (cpu_single_env
) {
182 cpu_exit(cpu_single_env
);
188 static void sigbus_reraise(void)
191 struct sigaction action
;
193 memset(&action
, 0, sizeof(action
));
194 action
.sa_handler
= SIG_DFL
;
195 if (!sigaction(SIGBUS
, &action
, NULL
)) {
198 sigaddset(&set
, SIGBUS
);
199 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
201 perror("Failed to re-raise SIGBUS!\n");
205 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
208 if (kvm_on_sigbus(siginfo
->ssi_code
,
209 (void *)(intptr_t)siginfo
->ssi_addr
)) {
214 static void qemu_init_sigbus(void)
216 struct sigaction action
;
218 memset(&action
, 0, sizeof(action
));
219 action
.sa_flags
= SA_SIGINFO
;
220 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
221 sigaction(SIGBUS
, &action
, NULL
);
223 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
226 static void qemu_kvm_eat_signals(CPUState
*env
)
228 struct timespec ts
= { 0, 0 };
234 sigemptyset(&waitset
);
235 sigaddset(&waitset
, SIG_IPI
);
236 sigaddset(&waitset
, SIGBUS
);
239 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
240 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
241 perror("sigtimedwait");
247 if (kvm_on_sigbus_vcpu(env
, siginfo
.si_code
, siginfo
.si_addr
)) {
255 r
= sigpending(&chkset
);
257 perror("sigpending");
260 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
263 #else /* !CONFIG_LINUX */
265 static void qemu_init_sigbus(void)
269 static void qemu_kvm_eat_signals(CPUState
*env
)
272 #endif /* !CONFIG_LINUX */
275 static int io_thread_fd
= -1;
277 static void qemu_event_increment(void)
279 /* Write 8 bytes to be compatible with eventfd. */
280 static const uint64_t val
= 1;
283 if (io_thread_fd
== -1) {
287 ret
= write(io_thread_fd
, &val
, sizeof(val
));
288 } while (ret
< 0 && errno
== EINTR
);
290 /* EAGAIN is fine, a read must be pending. */
291 if (ret
< 0 && errno
!= EAGAIN
) {
292 fprintf(stderr
, "qemu_event_increment: write() failed: %s\n",
298 static void qemu_event_read(void *opaque
)
300 int fd
= (intptr_t)opaque
;
304 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
306 len
= read(fd
, buffer
, sizeof(buffer
));
307 } while ((len
== -1 && errno
== EINTR
) || len
== sizeof(buffer
));
310 static int qemu_event_init(void)
315 err
= qemu_eventfd(fds
);
319 err
= fcntl_setfl(fds
[0], O_NONBLOCK
);
323 err
= fcntl_setfl(fds
[1], O_NONBLOCK
);
327 qemu_set_fd_handler2(fds
[0], NULL
, qemu_event_read
, NULL
,
328 (void *)(intptr_t)fds
[0]);
330 io_thread_fd
= fds
[1];
339 static void dummy_signal(int sig
)
343 /* If we have signalfd, we mask out the signals we want to handle and then
344 * use signalfd to listen for them. We rely on whatever the current signal
345 * handler is to dispatch the signals when we receive them.
347 static void sigfd_handler(void *opaque
)
349 int fd
= (intptr_t)opaque
;
350 struct qemu_signalfd_siginfo info
;
351 struct sigaction action
;
356 len
= read(fd
, &info
, sizeof(info
));
357 } while (len
== -1 && errno
== EINTR
);
359 if (len
== -1 && errno
== EAGAIN
) {
363 if (len
!= sizeof(info
)) {
364 printf("read from sigfd returned %zd: %m\n", len
);
368 sigaction(info
.ssi_signo
, NULL
, &action
);
369 if ((action
.sa_flags
& SA_SIGINFO
) && action
.sa_sigaction
) {
370 action
.sa_sigaction(info
.ssi_signo
,
371 (siginfo_t
*)&info
, NULL
);
372 } else if (action
.sa_handler
) {
373 action
.sa_handler(info
.ssi_signo
);
378 static int qemu_signal_init(void)
383 /* SIGUSR2 used by posix-aio-compat.c */
385 sigaddset(&set
, SIGUSR2
);
386 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
389 * SIG_IPI must be blocked in the main thread and must not be caught
390 * by sigwait() in the signal thread. Otherwise, the cpu thread will
391 * not catch it reliably.
394 sigaddset(&set
, SIG_IPI
);
395 pthread_sigmask(SIG_BLOCK
, &set
, NULL
);
398 sigaddset(&set
, SIGIO
);
399 sigaddset(&set
, SIGALRM
);
400 sigaddset(&set
, SIGBUS
);
401 pthread_sigmask(SIG_BLOCK
, &set
, NULL
);
403 sigfd
= qemu_signalfd(&set
);
405 fprintf(stderr
, "failed to create signalfd\n");
409 fcntl_setfl(sigfd
, O_NONBLOCK
);
411 qemu_set_fd_handler2(sigfd
, NULL
, sigfd_handler
, NULL
,
412 (void *)(intptr_t)sigfd
);
417 static void qemu_kvm_init_cpu_signals(CPUState
*env
)
421 struct sigaction sigact
;
423 memset(&sigact
, 0, sizeof(sigact
));
424 sigact
.sa_handler
= dummy_signal
;
425 sigaction(SIG_IPI
, &sigact
, NULL
);
427 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
428 sigdelset(&set
, SIG_IPI
);
429 sigdelset(&set
, SIGBUS
);
430 r
= kvm_set_signal_mask(env
, &set
);
432 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
436 sigdelset(&set
, SIG_IPI
);
437 sigdelset(&set
, SIGBUS
);
438 r
= kvm_set_signal_mask(env
, &set
);
440 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
445 static void qemu_tcg_init_cpu_signals(void)
448 struct sigaction sigact
;
450 memset(&sigact
, 0, sizeof(sigact
));
451 sigact
.sa_handler
= cpu_signal
;
452 sigaction(SIG_IPI
, &sigact
, NULL
);
455 sigaddset(&set
, SIG_IPI
);
456 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
461 HANDLE qemu_event_handle
;
463 static void dummy_event_handler(void *opaque
)
467 static int qemu_event_init(void)
469 qemu_event_handle
= CreateEvent(NULL
, FALSE
, FALSE
, NULL
);
470 if (!qemu_event_handle
) {
471 fprintf(stderr
, "Failed CreateEvent: %ld\n", GetLastError());
474 qemu_add_wait_object(qemu_event_handle
, dummy_event_handler
, NULL
);
478 static void qemu_event_increment(void)
480 if (!SetEvent(qemu_event_handle
)) {
481 fprintf(stderr
, "qemu_event_increment: SetEvent failed: %ld\n",
487 static int qemu_signal_init(void)
492 static void qemu_kvm_init_cpu_signals(CPUState
*env
)
497 static void qemu_tcg_init_cpu_signals(void)
502 QemuMutex qemu_global_mutex
;
503 static QemuCond qemu_io_proceeded_cond
;
504 static bool iothread_requesting_mutex
;
506 static QemuThread io_thread
;
508 static QemuThread
*tcg_cpu_thread
;
509 static QemuCond
*tcg_halt_cond
;
512 static QemuCond qemu_cpu_cond
;
514 static QemuCond qemu_pause_cond
;
515 static QemuCond qemu_work_cond
;
517 int qemu_init_main_loop(void)
523 ret
= qemu_signal_init();
528 /* Note eventfd must be drained before signalfd handlers run */
529 ret
= qemu_event_init();
534 qemu_cond_init(&qemu_cpu_cond
);
535 qemu_cond_init(&qemu_pause_cond
);
536 qemu_cond_init(&qemu_work_cond
);
537 qemu_cond_init(&qemu_io_proceeded_cond
);
538 qemu_mutex_init(&qemu_global_mutex
);
539 qemu_mutex_lock(&qemu_global_mutex
);
541 qemu_thread_get_self(&io_thread
);
546 void qemu_main_loop_start(void)
551 void run_on_cpu(CPUState
*env
, void (*func
)(void *data
), void *data
)
553 struct qemu_work_item wi
;
555 if (qemu_cpu_is_self(env
)) {
562 if (!env
->queued_work_first
) {
563 env
->queued_work_first
= &wi
;
565 env
->queued_work_last
->next
= &wi
;
567 env
->queued_work_last
= &wi
;
573 CPUState
*self_env
= cpu_single_env
;
575 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
576 cpu_single_env
= self_env
;
580 static void flush_queued_work(CPUState
*env
)
582 struct qemu_work_item
*wi
;
584 if (!env
->queued_work_first
) {
588 while ((wi
= env
->queued_work_first
)) {
589 env
->queued_work_first
= wi
->next
;
593 env
->queued_work_last
= NULL
;
594 qemu_cond_broadcast(&qemu_work_cond
);
597 static void qemu_wait_io_event_common(CPUState
*env
)
602 qemu_cond_signal(&qemu_pause_cond
);
604 flush_queued_work(env
);
605 env
->thread_kicked
= false;
608 static void qemu_tcg_wait_io_event(void)
612 while (all_cpu_threads_idle()) {
613 /* Start accounting real time to the virtual clock if the CPUs
615 qemu_clock_warp(vm_clock
);
616 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
619 while (iothread_requesting_mutex
) {
620 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
623 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
624 qemu_wait_io_event_common(env
);
628 static void qemu_kvm_wait_io_event(CPUState
*env
)
630 while (cpu_thread_is_idle(env
)) {
631 qemu_cond_wait(env
->halt_cond
, &qemu_global_mutex
);
634 qemu_kvm_eat_signals(env
);
635 qemu_wait_io_event_common(env
);
638 static void *qemu_kvm_cpu_thread_fn(void *arg
)
643 qemu_mutex_lock(&qemu_global_mutex
);
644 qemu_thread_get_self(env
->thread
);
645 env
->thread_id
= qemu_get_thread_id();
647 r
= kvm_init_vcpu(env
);
649 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
653 qemu_kvm_init_cpu_signals(env
);
655 /* signal CPU creation */
657 qemu_cond_signal(&qemu_cpu_cond
);
660 if (cpu_can_run(env
)) {
661 r
= kvm_cpu_exec(env
);
662 if (r
== EXCP_DEBUG
) {
663 cpu_handle_guest_debug(env
);
666 qemu_kvm_wait_io_event(env
);
672 static void *qemu_tcg_cpu_thread_fn(void *arg
)
676 qemu_tcg_init_cpu_signals();
677 qemu_thread_get_self(env
->thread
);
679 /* signal CPU creation */
680 qemu_mutex_lock(&qemu_global_mutex
);
681 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
682 env
->thread_id
= qemu_get_thread_id();
685 qemu_cond_signal(&qemu_cpu_cond
);
687 /* wait for initial kick-off after machine start */
688 while (first_cpu
->stopped
) {
689 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
694 if (use_icount
&& qemu_next_icount_deadline() <= 0) {
697 qemu_tcg_wait_io_event();
703 static void qemu_cpu_kick_thread(CPUState
*env
)
708 err
= pthread_kill(env
->thread
->thread
, SIG_IPI
);
710 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
714 if (!qemu_cpu_is_self(env
)) {
715 SuspendThread(env
->thread
->thread
);
717 ResumeThread(env
->thread
->thread
);
722 void qemu_cpu_kick(void *_env
)
724 CPUState
*env
= _env
;
726 qemu_cond_broadcast(env
->halt_cond
);
727 if (kvm_enabled() && !env
->thread_kicked
) {
728 qemu_cpu_kick_thread(env
);
729 env
->thread_kicked
= true;
733 void qemu_cpu_kick_self(void)
736 assert(cpu_single_env
);
738 if (!cpu_single_env
->thread_kicked
) {
739 qemu_cpu_kick_thread(cpu_single_env
);
740 cpu_single_env
->thread_kicked
= true;
747 int qemu_cpu_is_self(void *_env
)
749 CPUState
*env
= _env
;
751 return qemu_thread_is_self(env
->thread
);
754 void qemu_mutex_lock_iothread(void)
757 qemu_mutex_lock(&qemu_global_mutex
);
759 iothread_requesting_mutex
= true;
760 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
761 qemu_cpu_kick_thread(first_cpu
);
762 qemu_mutex_lock(&qemu_global_mutex
);
764 iothread_requesting_mutex
= false;
765 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
769 void qemu_mutex_unlock_iothread(void)
771 qemu_mutex_unlock(&qemu_global_mutex
);
774 static int all_vcpus_paused(void)
776 CPUState
*penv
= first_cpu
;
779 if (!penv
->stopped
) {
782 penv
= (CPUState
*)penv
->next_cpu
;
788 void pause_all_vcpus(void)
790 CPUState
*penv
= first_cpu
;
795 penv
= (CPUState
*)penv
->next_cpu
;
798 while (!all_vcpus_paused()) {
799 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
803 penv
= (CPUState
*)penv
->next_cpu
;
808 void resume_all_vcpus(void)
810 CPUState
*penv
= first_cpu
;
816 penv
= (CPUState
*)penv
->next_cpu
;
820 static void qemu_tcg_init_vcpu(void *_env
)
822 CPUState
*env
= _env
;
824 /* share a single thread for all cpus with TCG */
825 if (!tcg_cpu_thread
) {
826 env
->thread
= g_malloc0(sizeof(QemuThread
));
827 env
->halt_cond
= g_malloc0(sizeof(QemuCond
));
828 qemu_cond_init(env
->halt_cond
);
829 tcg_halt_cond
= env
->halt_cond
;
830 qemu_thread_create(env
->thread
, qemu_tcg_cpu_thread_fn
, env
);
831 while (env
->created
== 0) {
832 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
834 tcg_cpu_thread
= env
->thread
;
836 env
->thread
= tcg_cpu_thread
;
837 env
->halt_cond
= tcg_halt_cond
;
841 static void qemu_kvm_start_vcpu(CPUState
*env
)
843 env
->thread
= g_malloc0(sizeof(QemuThread
));
844 env
->halt_cond
= g_malloc0(sizeof(QemuCond
));
845 qemu_cond_init(env
->halt_cond
);
846 qemu_thread_create(env
->thread
, qemu_kvm_cpu_thread_fn
, env
);
847 while (env
->created
== 0) {
848 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
852 void qemu_init_vcpu(void *_env
)
854 CPUState
*env
= _env
;
856 env
->nr_cores
= smp_cores
;
857 env
->nr_threads
= smp_threads
;
860 qemu_kvm_start_vcpu(env
);
862 qemu_tcg_init_vcpu(env
);
866 void qemu_notify_event(void)
868 qemu_event_increment();
871 void cpu_stop_current(void)
873 if (cpu_single_env
) {
874 cpu_single_env
->stop
= 0;
875 cpu_single_env
->stopped
= 1;
876 cpu_exit(cpu_single_env
);
877 qemu_cond_signal(&qemu_pause_cond
);
881 void vm_stop(RunState state
)
883 if (!qemu_thread_is_self(&io_thread
)) {
884 qemu_system_vmstop_request(state
);
886 * FIXME: should not return to device code in case
887 * vm_stop() has been requested.
895 static int tcg_cpu_exec(CPUState
*env
)
898 #ifdef CONFIG_PROFILER
902 #ifdef CONFIG_PROFILER
903 ti
= profile_getclock();
908 qemu_icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
909 env
->icount_decr
.u16
.low
= 0;
910 env
->icount_extra
= 0;
911 count
= qemu_icount_round(qemu_next_icount_deadline());
912 qemu_icount
+= count
;
913 decr
= (count
> 0xffff) ? 0xffff : count
;
915 env
->icount_decr
.u16
.low
= decr
;
916 env
->icount_extra
= count
;
919 #ifdef CONFIG_PROFILER
920 qemu_time
+= profile_getclock() - ti
;
923 /* Fold pending instructions back into the
924 instruction counter, and clear the interrupt flag. */
925 qemu_icount
-= (env
->icount_decr
.u16
.low
926 + env
->icount_extra
);
927 env
->icount_decr
.u32
= 0;
928 env
->icount_extra
= 0;
933 bool cpu_exec_all(void)
937 /* Account partial waits to the vm_clock. */
938 qemu_clock_warp(vm_clock
);
940 if (next_cpu
== NULL
) {
941 next_cpu
= first_cpu
;
943 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= next_cpu
->next_cpu
) {
944 CPUState
*env
= next_cpu
;
946 qemu_clock_enable(vm_clock
,
947 (env
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
949 if (cpu_can_run(env
)) {
951 r
= kvm_cpu_exec(env
);
952 qemu_kvm_eat_signals(env
);
954 r
= tcg_cpu_exec(env
);
956 if (r
== EXCP_DEBUG
) {
957 cpu_handle_guest_debug(env
);
960 } else if (env
->stop
|| env
->stopped
) {
965 return !all_cpu_threads_idle();
968 void set_numa_modes(void)
973 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
974 for (i
= 0; i
< nb_numa_nodes
; i
++) {
975 if (node_cpumask
[i
] & (1 << env
->cpu_index
)) {
982 void set_cpu_log(const char *optarg
)
985 const CPULogItem
*item
;
987 mask
= cpu_str_to_log_mask(optarg
);
989 printf("Log items (comma separated):\n");
990 for (item
= cpu_log_items
; item
->mask
!= 0; item
++) {
991 printf("%-10s %s\n", item
->name
, item
->help
);
998 void set_cpu_log_filename(const char *optarg
)
1000 cpu_set_log_filename(optarg
);
1003 /* Return the virtual CPU time, based on the instruction counter. */
1004 int64_t cpu_get_icount(void)
1007 CPUState
*env
= cpu_single_env
;;
1009 icount
= qemu_icount
;
1011 if (!can_do_io(env
)) {
1012 fprintf(stderr
, "Bad clock read\n");
1014 icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
1016 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
1019 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1021 /* XXX: implement xxx_cpu_list for targets that still miss it */
1022 #if defined(cpu_list_id)
1023 cpu_list_id(f
, cpu_fprintf
, optarg
);
1024 #elif defined(cpu_list)
1025 cpu_list(f
, cpu_fprintf
); /* deprecated */