4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
9 #include "config-host.h"
15 #include "qemu-common.h"
22 #include "qemu-thread.h"
27 #include <sys/utsname.h>
28 #include <sys/syscall.h>
30 #include <sys/ioctl.h>
32 #include <sys/prctl.h>
35 #define PR_MCE_KILL 33
39 #define BUS_MCEERR_AR 4
42 #define BUS_MCEERR_AO 5
45 #define EXPECTED_KVM_API_VERSION 12
47 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
48 #error libkvm: userspace and kernel version mismatch
53 int kvm_pit_reinject
= 1;
58 kvm_context_t kvm_context
;
60 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
61 pthread_cond_t qemu_vcpu_cond
= PTHREAD_COND_INITIALIZER
;
62 pthread_cond_t qemu_system_cond
= PTHREAD_COND_INITIALIZER
;
63 pthread_cond_t qemu_pause_cond
= PTHREAD_COND_INITIALIZER
;
64 pthread_cond_t qemu_work_cond
= PTHREAD_COND_INITIALIZER
;
65 __thread CPUState
*current_env
;
67 static int qemu_system_ready
;
69 #define SIG_IPI (SIGRTMIN+4)
71 CPUState
*kvm_debug_cpu_requested
;
73 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
74 /* The list of ioperm_data */
75 static QLIST_HEAD(, ioperm_data
) ioperm_head
;
78 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
80 static int handle_unhandled(uint64_t reason
)
82 fprintf(stderr
, "kvm: unhandled exit %" PRIx64
"\n", reason
);
86 #define VMX_INVALID_GUEST_STATE 0x80000021
88 static int handle_failed_vmentry(uint64_t reason
)
90 fprintf(stderr
, "kvm: vm entry failed with error 0x%" PRIx64
"\n\n", reason
);
92 /* Perhaps we will need to check if this machine is intel since exit reason 0x21
93 has a different interpretation on SVM */
94 if (reason
== VMX_INVALID_GUEST_STATE
) {
95 fprintf(stderr
, "If you're runnning a guest on an Intel machine without\n");
96 fprintf(stderr
, "unrestricted mode support, the failure can be most likely\n");
97 fprintf(stderr
, "due to the guest entering an invalid state for Intel VT.\n");
98 fprintf(stderr
, "For example, the guest maybe running in big real mode\n");
99 fprintf(stderr
, "which is not supported on less recent Intel processors.\n\n");
105 static inline void set_gsi(kvm_context_t kvm
, unsigned int gsi
)
107 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
109 if (gsi
< kvm
->max_gsi
)
110 bitmap
[gsi
/ 32] |= 1U << (gsi
% 32);
112 DPRINTF("Invalid GSI %u\n", gsi
);
115 static inline void clear_gsi(kvm_context_t kvm
, unsigned int gsi
)
117 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
119 if (gsi
< kvm
->max_gsi
)
120 bitmap
[gsi
/ 32] &= ~(1U << (gsi
% 32));
122 DPRINTF("Invalid GSI %u\n", gsi
);
125 static int kvm_create_context(void);
133 fd
= open("/dev/kvm", O_RDWR
);
135 perror("open /dev/kvm");
138 r
= ioctl(fd
, KVM_GET_API_VERSION
, 0);
141 "kvm kernel version too old: "
142 "KVM_GET_API_VERSION ioctl not supported\n");
145 if (r
< EXPECTED_KVM_API_VERSION
) {
146 fprintf(stderr
, "kvm kernel version too old: "
147 "We expect API version %d or newer, but got "
148 "version %d\n", EXPECTED_KVM_API_VERSION
, r
);
151 if (r
> EXPECTED_KVM_API_VERSION
) {
152 fprintf(stderr
, "kvm userspace version too old\n");
155 kvm_state
= qemu_mallocz(sizeof(*kvm_state
));
156 kvm_context
= &kvm_state
->kvm_context
;
159 kvm_state
->vmfd
= -1;
161 #ifdef KVM_CAP_SET_GUEST_DEBUG
162 QTAILQ_INIT(&kvm_state
->kvm_sw_breakpoints
);
165 for (i
= 0; i
< ARRAY_SIZE(kvm_state
->slots
); i
++) {
166 kvm_state
->slots
[i
].slot
= i
;
169 #ifdef KVM_CAP_USER_MEMORY
170 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_USER_MEMORY
);
173 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
177 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
180 gsi_count
= kvm_get_gsi_count(kvm_context
);
184 /* Round up so we can search ints using ffs */
185 gsi_bits
= ALIGN(gsi_count
, 32);
186 kvm_context
->used_gsi_bitmap
= qemu_mallocz(gsi_bits
/ 8);
187 kvm_context
->max_gsi
= gsi_bits
;
189 /* Mark any over-allocated bits as already in use */
190 for (i
= gsi_count
; i
< gsi_bits
; i
++) {
191 set_gsi(kvm_context
, i
);
195 kvm_cpu_register_phys_memory_client();
197 pthread_mutex_lock(&qemu_mutex
);
198 return kvm_create_context();
205 static void kvm_finalize(KVMState
*s
)
208 if (kvm->vcpu_fd[0] != -1)
209 close(kvm->vcpu_fd[0]);
210 if (kvm->vm_fd != -1)
217 static int kvm_set_boot_vcpu_id(kvm_context_t kvm
, uint32_t id
)
219 #ifdef KVM_CAP_SET_BOOT_CPU_ID
220 int r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_BOOT_CPU_ID
);
222 return kvm_vm_ioctl(kvm_state
, KVM_SET_BOOT_CPU_ID
, id
);
230 void kvm_create_irqchip(kvm_context_t kvm
)
234 #ifdef KVM_CAP_IRQCHIP
236 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_IRQCHIP
);
237 if (r
> 0) { /* kernel irqchip supported */
238 r
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_IRQCHIP
);
240 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE
;
241 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
242 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
,
243 KVM_CAP_IRQ_INJECT_STATUS
);
245 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE_STATUS
;
248 kvm_state
->irqchip_in_kernel
= 1;
250 fprintf(stderr
, "Create kernel PIC irqchip failed\n");
256 #ifdef KVM_CAP_IRQCHIP
258 int kvm_set_irq_level(kvm_context_t kvm
, int irq
, int level
, int *status
)
260 struct kvm_irq_level event
;
263 if (!kvm_state
->irqchip_in_kernel
) {
268 r
= kvm_vm_ioctl(kvm_state
, kvm
->irqchip_inject_ioctl
, &event
);
270 perror("kvm_set_irq_level");
274 #ifdef KVM_CAP_IRQ_INJECT_STATUS
276 (kvm
->irqchip_inject_ioctl
== KVM_IRQ_LINE
) ? 1 : event
.status
;
285 int kvm_get_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
289 if (!kvm_state
->irqchip_in_kernel
) {
292 r
= kvm_vm_ioctl(kvm_state
, KVM_GET_IRQCHIP
, chip
);
294 perror("kvm_get_irqchip\n");
299 int kvm_set_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
303 if (!kvm_state
->irqchip_in_kernel
) {
306 r
= kvm_vm_ioctl(kvm_state
, KVM_SET_IRQCHIP
, chip
);
308 perror("kvm_set_irqchip\n");
315 static int handle_mmio(CPUState
*env
)
317 unsigned long addr
= env
->kvm_run
->mmio
.phys_addr
;
318 struct kvm_run
*kvm_run
= env
->kvm_run
;
319 void *data
= kvm_run
->mmio
.data
;
321 /* hack: Red Hat 7.1 generates these weird accesses. */
322 if ((addr
> 0xa0000 - 4 && addr
<= 0xa0000) && kvm_run
->mmio
.len
== 3) {
326 cpu_physical_memory_rw(addr
, data
, kvm_run
->mmio
.len
, kvm_run
->mmio
.is_write
);
330 int handle_io_window(kvm_context_t kvm
)
335 int handle_shutdown(kvm_context_t kvm
, CPUState
*env
)
337 /* stop the current vcpu from going back to guest mode */
340 qemu_system_reset_request();
344 static inline void push_nmi(kvm_context_t kvm
)
346 #ifdef KVM_CAP_USER_NMI
348 #endif /* KVM_CAP_USER_NMI */
351 void post_kvm_run(kvm_context_t kvm
, CPUState
*env
)
353 pthread_mutex_lock(&qemu_mutex
);
354 kvm_arch_post_run(env
, env
->kvm_run
);
355 cpu_single_env
= env
;
358 int pre_kvm_run(kvm_context_t kvm
, CPUState
*env
)
360 kvm_arch_pre_run(env
, env
->kvm_run
);
362 pthread_mutex_unlock(&qemu_mutex
);
366 int kvm_is_ready_for_interrupt_injection(CPUState
*env
)
368 return env
->kvm_run
->ready_for_interrupt_injection
;
371 int kvm_run(CPUState
*env
)
374 kvm_context_t kvm
= &env
->kvm_state
->kvm_context
;
375 struct kvm_run
*run
= env
->kvm_run
;
376 int fd
= env
->kvm_fd
;
379 if (env
->kvm_vcpu_dirty
) {
380 kvm_arch_put_registers(env
, KVM_PUT_RUNTIME_STATE
);
381 env
->kvm_vcpu_dirty
= 0;
384 #if !defined(__s390__)
385 if (!kvm_state
->irqchip_in_kernel
) {
386 run
->request_interrupt_window
= kvm_arch_try_push_interrupts(env
);
390 r
= pre_kvm_run(kvm
, env
);
394 if (env
->exit_request
) {
395 env
->exit_request
= 0;
396 pthread_kill(env
->thread
->thread
, SIG_IPI
);
398 r
= ioctl(fd
, KVM_RUN
, 0);
400 if (r
== -1 && errno
!= EINTR
&& errno
!= EAGAIN
) {
402 post_kvm_run(kvm
, env
);
403 fprintf(stderr
, "kvm_run: %s\n", strerror(-r
));
407 post_kvm_run(kvm
, env
);
409 kvm_flush_coalesced_mmio_buffer();
411 #if !defined(__s390__)
413 r
= handle_io_window(kvm
);
418 switch (run
->exit_reason
) {
419 case KVM_EXIT_UNKNOWN
:
420 r
= handle_unhandled(run
->hw
.hardware_exit_reason
);
422 case KVM_EXIT_FAIL_ENTRY
:
423 r
= handle_failed_vmentry(run
->fail_entry
.hardware_entry_failure_reason
);
425 case KVM_EXIT_EXCEPTION
:
426 fprintf(stderr
, "exception %d (%x)\n", run
->ex
.exception
,
428 cpu_dump_state(env
, stderr
, fprintf
, CPU_DUMP_CODE
);
432 kvm_handle_io(run
->io
.port
,
433 (uint8_t *)run
+ run
->io
.data_offset
,
440 r
= handle_mmio(env
);
443 r
= kvm_arch_halt(env
);
445 case KVM_EXIT_IRQ_WINDOW_OPEN
:
447 case KVM_EXIT_SHUTDOWN
:
448 r
= handle_shutdown(kvm
, env
);
450 #if defined(__s390__)
451 case KVM_EXIT_S390_SIEIC
:
452 r
= kvm_s390_handle_intercept(kvm
, env
, run
);
454 case KVM_EXIT_S390_RESET
:
455 r
= kvm_s390_handle_reset(kvm
, env
, run
);
458 case KVM_EXIT_INTERNAL_ERROR
:
459 r
= kvm_handle_internal_error(env
, run
);
462 r
= kvm_arch_run(env
);
464 fprintf(stderr
, "unhandled vm exit: 0x%x\n", run
->exit_reason
);
465 cpu_dump_state(env
, stderr
, fprintf
, CPU_DUMP_CODE
);
481 int kvm_inject_irq(CPUState
*env
, unsigned irq
)
483 struct kvm_interrupt intr
;
486 return kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &intr
);
489 int kvm_inject_nmi(CPUState
*env
)
491 #ifdef KVM_CAP_USER_NMI
492 return kvm_vcpu_ioctl(env
, KVM_NMI
);
498 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
499 int kvm_assign_pci_device(kvm_context_t kvm
,
500 struct kvm_assigned_pci_dev
*assigned_dev
)
502 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_PCI_DEVICE
, assigned_dev
);
505 static int kvm_old_assign_irq(kvm_context_t kvm
,
506 struct kvm_assigned_irq
*assigned_irq
)
508 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_IRQ
, assigned_irq
);
511 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
512 int kvm_assign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
516 ret
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_ASSIGN_DEV_IRQ
);
518 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_DEV_IRQ
, assigned_irq
);
521 return kvm_old_assign_irq(kvm
, assigned_irq
);
524 int kvm_deassign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
526 return kvm_vm_ioctl(kvm_state
, KVM_DEASSIGN_DEV_IRQ
, assigned_irq
);
529 int kvm_assign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
531 return kvm_old_assign_irq(kvm
, assigned_irq
);
536 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
537 int kvm_deassign_pci_device(kvm_context_t kvm
,
538 struct kvm_assigned_pci_dev
*assigned_dev
)
540 return kvm_vm_ioctl(kvm_state
, KVM_DEASSIGN_PCI_DEVICE
, assigned_dev
);
544 int kvm_reinject_control(kvm_context_t kvm
, int pit_reinject
)
546 #ifdef KVM_CAP_REINJECT_CONTROL
548 struct kvm_reinject_control control
;
550 control
.pit_reinject
= pit_reinject
;
552 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_REINJECT_CONTROL
);
554 return kvm_vm_ioctl(kvm_state
, KVM_REINJECT_CONTROL
, &control
);
560 int kvm_has_gsi_routing(void)
564 #ifdef KVM_CAP_IRQ_ROUTING
565 r
= kvm_check_extension(kvm_state
, KVM_CAP_IRQ_ROUTING
);
570 int kvm_get_gsi_count(kvm_context_t kvm
)
572 #ifdef KVM_CAP_IRQ_ROUTING
573 return kvm_check_extension(kvm_state
, KVM_CAP_IRQ_ROUTING
);
579 int kvm_clear_gsi_routes(void)
581 #ifdef KVM_CAP_IRQ_ROUTING
582 kvm_context_t kvm
= kvm_context
;
584 kvm
->irq_routes
->nr
= 0;
591 int kvm_add_routing_entry(struct kvm_irq_routing_entry
*entry
)
593 #ifdef KVM_CAP_IRQ_ROUTING
594 kvm_context_t kvm
= kvm_context
;
595 struct kvm_irq_routing
*z
;
596 struct kvm_irq_routing_entry
*new;
599 if (kvm
->irq_routes
->nr
== kvm
->nr_allocated_irq_routes
) {
600 n
= kvm
->nr_allocated_irq_routes
* 2;
604 size
= sizeof(struct kvm_irq_routing
);
605 size
+= n
* sizeof(*new);
606 z
= realloc(kvm
->irq_routes
, size
);
610 kvm
->nr_allocated_irq_routes
= n
;
613 n
= kvm
->irq_routes
->nr
++;
614 new = &kvm
->irq_routes
->entries
[n
];
615 memset(new, 0, sizeof(*new));
616 new->gsi
= entry
->gsi
;
617 new->type
= entry
->type
;
618 new->flags
= entry
->flags
;
621 set_gsi(kvm
, entry
->gsi
);
629 int kvm_add_irq_route(int gsi
, int irqchip
, int pin
)
631 #ifdef KVM_CAP_IRQ_ROUTING
632 struct kvm_irq_routing_entry e
;
635 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
637 e
.u
.irqchip
.irqchip
= irqchip
;
638 e
.u
.irqchip
.pin
= pin
;
639 return kvm_add_routing_entry(&e
);
645 int kvm_del_routing_entry(struct kvm_irq_routing_entry
*entry
)
647 #ifdef KVM_CAP_IRQ_ROUTING
648 kvm_context_t kvm
= kvm_context
;
649 struct kvm_irq_routing_entry
*e
, *p
;
650 int i
, gsi
, found
= 0;
654 for (i
= 0; i
< kvm
->irq_routes
->nr
; ++i
) {
655 e
= &kvm
->irq_routes
->entries
[i
];
656 if (e
->type
== entry
->type
&& e
->gsi
== gsi
) {
658 case KVM_IRQ_ROUTING_IRQCHIP
:{
659 if (e
->u
.irqchip
.irqchip
==
660 entry
->u
.irqchip
.irqchip
661 && e
->u
.irqchip
.pin
== entry
->u
.irqchip
.pin
) {
662 p
= &kvm
->irq_routes
->entries
[--kvm
->irq_routes
->nr
];
668 case KVM_IRQ_ROUTING_MSI
:{
669 if (e
->u
.msi
.address_lo
==
670 entry
->u
.msi
.address_lo
671 && e
->u
.msi
.address_hi
==
672 entry
->u
.msi
.address_hi
673 && e
->u
.msi
.data
== entry
->u
.msi
.data
) {
674 p
= &kvm
->irq_routes
->entries
[--kvm
->irq_routes
->nr
];
684 /* If there are no other users of this GSI
685 * mark it available in the bitmap */
686 for (i
= 0; i
< kvm
->irq_routes
->nr
; i
++) {
687 e
= &kvm
->irq_routes
->entries
[i
];
691 if (i
== kvm
->irq_routes
->nr
) {
705 int kvm_update_routing_entry(struct kvm_irq_routing_entry
*entry
,
706 struct kvm_irq_routing_entry
*newentry
)
708 #ifdef KVM_CAP_IRQ_ROUTING
709 kvm_context_t kvm
= kvm_context
;
710 struct kvm_irq_routing_entry
*e
;
713 if (entry
->gsi
!= newentry
->gsi
|| entry
->type
!= newentry
->type
) {
717 for (i
= 0; i
< kvm
->irq_routes
->nr
; ++i
) {
718 e
= &kvm
->irq_routes
->entries
[i
];
719 if (e
->type
!= entry
->type
|| e
->gsi
!= entry
->gsi
) {
723 case KVM_IRQ_ROUTING_IRQCHIP
:
724 if (e
->u
.irqchip
.irqchip
== entry
->u
.irqchip
.irqchip
&&
725 e
->u
.irqchip
.pin
== entry
->u
.irqchip
.pin
) {
726 memcpy(&e
->u
.irqchip
, &newentry
->u
.irqchip
,
727 sizeof e
->u
.irqchip
);
731 case KVM_IRQ_ROUTING_MSI
:
732 if (e
->u
.msi
.address_lo
== entry
->u
.msi
.address_lo
&&
733 e
->u
.msi
.address_hi
== entry
->u
.msi
.address_hi
&&
734 e
->u
.msi
.data
== entry
->u
.msi
.data
) {
735 memcpy(&e
->u
.msi
, &newentry
->u
.msi
, sizeof e
->u
.msi
);
749 int kvm_del_irq_route(int gsi
, int irqchip
, int pin
)
751 #ifdef KVM_CAP_IRQ_ROUTING
752 struct kvm_irq_routing_entry e
;
755 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
757 e
.u
.irqchip
.irqchip
= irqchip
;
758 e
.u
.irqchip
.pin
= pin
;
759 return kvm_del_routing_entry(&e
);
765 int kvm_commit_irq_routes(void)
767 #ifdef KVM_CAP_IRQ_ROUTING
768 kvm_context_t kvm
= kvm_context
;
770 kvm
->irq_routes
->flags
= 0;
771 return kvm_vm_ioctl(kvm_state
, KVM_SET_GSI_ROUTING
, kvm
->irq_routes
);
777 int kvm_get_irq_route_gsi(void)
779 kvm_context_t kvm
= kvm_context
;
781 uint32_t *buf
= kvm
->used_gsi_bitmap
;
783 /* Return the lowest unused GSI in the bitmap */
784 for (i
= 0; i
< kvm
->max_gsi
/ 32; i
++) {
790 return bit
- 1 + i
* 32;
796 static void kvm_msi_routing_entry(struct kvm_irq_routing_entry
*e
,
801 e
->type
= KVM_IRQ_ROUTING_MSI
;
803 e
->u
.msi
.address_lo
= msg
->addr_lo
;
804 e
->u
.msi
.address_hi
= msg
->addr_hi
;
805 e
->u
.msi
.data
= msg
->data
;
808 int kvm_msi_message_add(KVMMsiMessage
*msg
)
810 struct kvm_irq_routing_entry e
;
813 ret
= kvm_get_irq_route_gsi();
819 kvm_msi_routing_entry(&e
, msg
);
820 return kvm_add_routing_entry(&e
);
823 int kvm_msi_message_del(KVMMsiMessage
*msg
)
825 struct kvm_irq_routing_entry e
;
827 kvm_msi_routing_entry(&e
, msg
);
828 return kvm_del_routing_entry(&e
);
831 int kvm_msi_message_update(KVMMsiMessage
*old
, KVMMsiMessage
*new)
833 struct kvm_irq_routing_entry e1
, e2
;
837 if (memcmp(old
, new, sizeof(KVMMsiMessage
)) == 0) {
841 kvm_msi_routing_entry(&e1
, old
);
842 kvm_msi_routing_entry(&e2
, new);
844 ret
= kvm_update_routing_entry(&e1
, &e2
);
853 #ifdef KVM_CAP_DEVICE_MSIX
854 int kvm_assign_set_msix_nr(kvm_context_t kvm
,
855 struct kvm_assigned_msix_nr
*msix_nr
)
857 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_SET_MSIX_NR
, msix_nr
);
860 int kvm_assign_set_msix_entry(kvm_context_t kvm
,
861 struct kvm_assigned_msix_entry
*entry
)
863 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_SET_MSIX_ENTRY
, entry
);
867 unsigned long kvm_get_thread_id(void)
869 return syscall(SYS_gettid
);
872 static void kvm_cond_wait(pthread_cond_t
*cond
)
874 CPUState
*env
= cpu_single_env
;
876 pthread_cond_wait(cond
, &qemu_mutex
);
877 cpu_single_env
= env
;
880 static void sig_ipi_handler(int n
)
884 static void sigbus_reraise(void)
887 struct sigaction action
;
889 memset(&action
, 0, sizeof(action
));
890 action
.sa_handler
= SIG_DFL
;
891 if (!sigaction(SIGBUS
, &action
, NULL
)) {
894 sigaddset(&set
, SIGBUS
);
895 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
897 perror("Failed to re-raise SIGBUS!\n");
901 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
904 if (kvm_on_sigbus(siginfo
->ssi_code
, (void *)(intptr_t)siginfo
->ssi_addr
))
908 void on_vcpu(CPUState
*env
, void (*func
)(void *data
), void *data
)
910 struct qemu_work_item wi
;
912 if (env
== current_env
) {
919 if (!env
->kvm_cpu_state
.queued_work_first
) {
920 env
->kvm_cpu_state
.queued_work_first
= &wi
;
922 env
->kvm_cpu_state
.queued_work_last
->next
= &wi
;
924 env
->kvm_cpu_state
.queued_work_last
= &wi
;
928 pthread_kill(env
->thread
->thread
, SIG_IPI
);
930 kvm_cond_wait(&qemu_work_cond
);
934 static void inject_interrupt(void *data
)
936 cpu_interrupt(current_env
, (long) data
);
939 void kvm_inject_interrupt(CPUState
*env
, int mask
)
941 on_vcpu(env
, inject_interrupt
, (void *) (long) mask
);
944 void kvm_update_interrupt_request(CPUState
*env
)
949 if (!current_env
|| !current_env
->created
) {
953 * Testing for created here is really redundant
955 if (current_env
&& current_env
->created
&&
956 env
!= current_env
&& !env
->kvm_cpu_state
.signalled
) {
961 env
->kvm_cpu_state
.signalled
= 1;
963 pthread_kill(env
->thread
->thread
, SIG_IPI
);
969 int kvm_cpu_exec(CPUState
*env
)
975 printf("kvm_run returned %d\n", r
);
976 cpu_dump_state(env
, stderr
, fprintf
, CPU_DUMP_CODE
);
977 vm_stop(VMSTOP_PANIC
);
983 int kvm_cpu_is_stopped(CPUState
*env
)
985 return !vm_running
|| env
->stopped
;
988 static void flush_queued_work(CPUState
*env
)
990 struct qemu_work_item
*wi
;
992 if (!env
->kvm_cpu_state
.queued_work_first
) {
996 while ((wi
= env
->kvm_cpu_state
.queued_work_first
)) {
997 env
->kvm_cpu_state
.queued_work_first
= wi
->next
;
1001 env
->kvm_cpu_state
.queued_work_last
= NULL
;
1002 pthread_cond_broadcast(&qemu_work_cond
);
1005 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
1013 ts
.tv_sec
= timeout
/ 1000;
1014 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
1015 sigemptyset(&waitset
);
1016 sigaddset(&waitset
, SIG_IPI
);
1017 sigaddset(&waitset
, SIGBUS
);
1020 pthread_mutex_unlock(&qemu_mutex
);
1022 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
1025 pthread_mutex_lock(&qemu_mutex
);
1027 if (r
== -1 && !(e
== EAGAIN
|| e
== EINTR
)) {
1028 printf("sigtimedwait: %s\n", strerror(e
));
1034 if (kvm_on_sigbus_vcpu(env
, siginfo
.si_code
, siginfo
.si_addr
))
1041 r
= sigpending(&chkset
);
1043 printf("sigpending: %s\n", strerror(e
));
1046 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
1048 cpu_single_env
= env
;
1049 flush_queued_work(env
);
1054 pthread_cond_signal(&qemu_pause_cond
);
1057 env
->kvm_cpu_state
.signalled
= 0;
1060 static int all_threads_paused(void)
1062 CPUState
*penv
= first_cpu
;
1068 penv
= (CPUState
*) penv
->next_cpu
;
1074 static void pause_all_threads(void)
1076 CPUState
*penv
= first_cpu
;
1079 if (penv
!= cpu_single_env
) {
1081 pthread_kill(penv
->thread
->thread
, SIG_IPI
);
1087 penv
= (CPUState
*) penv
->next_cpu
;
1090 while (!all_threads_paused()) {
1091 kvm_cond_wait(&qemu_pause_cond
);
1095 static void resume_all_threads(void)
1097 CPUState
*penv
= first_cpu
;
1099 assert(!cpu_single_env
);
1104 pthread_kill(penv
->thread
->thread
, SIG_IPI
);
1105 penv
= (CPUState
*) penv
->next_cpu
;
1109 static void kvm_vm_state_change_handler(void *context
, int running
, int reason
)
1112 resume_all_threads();
1114 pause_all_threads();
1118 static void setup_kernel_sigmask(CPUState
*env
)
1123 sigaddset(&set
, SIGUSR2
);
1124 sigaddset(&set
, SIGIO
);
1125 sigaddset(&set
, SIGALRM
);
1126 sigprocmask(SIG_BLOCK
, &set
, NULL
);
1128 sigprocmask(SIG_BLOCK
, NULL
, &set
);
1129 sigdelset(&set
, SIG_IPI
);
1130 sigdelset(&set
, SIGBUS
);
1132 kvm_set_signal_mask(env
, &set
);
1135 static void qemu_kvm_system_reset(void)
1137 pause_all_threads();
1139 cpu_synchronize_all_states();
1140 qemu_system_reset();
1142 resume_all_threads();
1145 static void process_irqchip_events(CPUState
*env
)
1147 kvm_arch_process_irqchip_events(env
);
1148 if (kvm_arch_has_work(env
))
1152 static int kvm_main_loop_cpu(CPUState
*env
)
1155 int run_cpu
= !kvm_cpu_is_stopped(env
);
1157 kvm_arch_process_async_events(env
);
1158 if (!kvm_irqchip_in_kernel()) {
1159 process_irqchip_events(env
);
1160 run_cpu
= !env
->halted
;
1165 kvm_main_loop_wait(env
, 0);
1167 kvm_main_loop_wait(env
, 1000);
1170 pthread_mutex_unlock(&qemu_mutex
);
1174 static void *ap_main_loop(void *_env
)
1176 CPUState
*env
= _env
;
1177 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1178 struct ioperm_data
*data
= NULL
;
1182 env
->thread_id
= kvm_get_thread_id();
1184 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1185 /* do ioperm for io ports of assigned devices */
1186 QLIST_FOREACH(data
, &ioperm_head
, entries
)
1187 on_vcpu(env
, kvm_arch_do_ioperm
, data
);
1190 pthread_mutex_lock(&qemu_mutex
);
1191 cpu_single_env
= env
;
1193 if (kvm_create_vcpu(env
) < 0) {
1196 setup_kernel_sigmask(env
);
1198 /* signal VCPU creation */
1199 current_env
->created
= 1;
1200 pthread_cond_signal(&qemu_vcpu_cond
);
1202 /* and wait for machine initialization */
1203 while (!qemu_system_ready
) {
1204 kvm_cond_wait(&qemu_system_cond
);
1207 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1208 cpu_single_env
= env
;
1210 kvm_main_loop_cpu(env
);
1214 int kvm_init_vcpu(CPUState
*env
)
1216 env
->thread
= qemu_mallocz(sizeof(QemuThread
));
1217 qemu_thread_create(env
->thread
, ap_main_loop
, env
);
1219 while (env
->created
== 0) {
1220 kvm_cond_wait(&qemu_vcpu_cond
);
1227 void kvm_hpet_disable_kpit(void)
1229 struct kvm_pit_state2 ps2
;
1231 kvm_get_pit2(kvm_context
, &ps2
);
1232 ps2
.flags
|= KVM_PIT_FLAGS_HPET_LEGACY
;
1233 kvm_set_pit2(kvm_context
, &ps2
);
1236 void kvm_hpet_enable_kpit(void)
1238 struct kvm_pit_state2 ps2
;
1240 kvm_get_pit2(kvm_context
, &ps2
);
1241 ps2
.flags
&= ~KVM_PIT_FLAGS_HPET_LEGACY
;
1242 kvm_set_pit2(kvm_context
, &ps2
);
1246 int kvm_init_ap(void)
1248 struct sigaction action
;
1250 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
1252 signal(SIG_IPI
, sig_ipi_handler
);
1254 memset(&action
, 0, sizeof(action
));
1255 action
.sa_flags
= SA_SIGINFO
;
1256 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
1257 sigaction(SIGBUS
, &action
, NULL
);
1258 prctl(PR_MCE_KILL
, 1, 1, 0, 0);
1262 /* If we have signalfd, we mask out the signals we want to handle and then
1263 * use signalfd to listen for them. We rely on whatever the current signal
1264 * handler is to dispatch the signals when we receive them.
1267 static void sigfd_handler(void *opaque
)
1269 int fd
= (unsigned long) opaque
;
1270 struct qemu_signalfd_siginfo info
;
1271 struct sigaction action
;
1276 len
= read(fd
, &info
, sizeof(info
));
1277 } while (len
== -1 && errno
== EINTR
);
1279 if (len
== -1 && errno
== EAGAIN
) {
1283 if (len
!= sizeof(info
)) {
1284 printf("read from sigfd returned %zd: %m\n", len
);
1288 sigaction(info
.ssi_signo
, NULL
, &action
);
1289 if ((action
.sa_flags
& SA_SIGINFO
) && action
.sa_sigaction
) {
1290 action
.sa_sigaction(info
.ssi_signo
,
1291 (siginfo_t
*)&info
, NULL
);
1292 } else if (action
.sa_handler
) {
1293 action
.sa_handler(info
.ssi_signo
);
1298 int kvm_main_loop(void)
1303 qemu_system_ready
= 1;
1306 sigaddset(&mask
, SIGIO
);
1307 sigaddset(&mask
, SIGALRM
);
1308 sigaddset(&mask
, SIGBUS
);
1309 sigprocmask(SIG_BLOCK
, &mask
, NULL
);
1311 sigfd
= qemu_signalfd(&mask
);
1313 fprintf(stderr
, "failed to create signalfd\n");
1317 fcntl(sigfd
, F_SETFL
, O_NONBLOCK
);
1319 qemu_set_fd_handler2(sigfd
, NULL
, sigfd_handler
, NULL
,
1320 (void *)(unsigned long) sigfd
);
1322 pthread_cond_broadcast(&qemu_system_cond
);
1324 cpu_single_env
= NULL
;
1328 if (qemu_shutdown_requested()) {
1329 monitor_protocol_event(QEVENT_SHUTDOWN
, NULL
);
1330 if (qemu_no_shutdown()) {
1331 vm_stop(VMSTOP_SHUTDOWN
);
1335 } else if (qemu_powerdown_requested()) {
1336 monitor_protocol_event(QEVENT_POWERDOWN
, NULL
);
1337 qemu_irq_raise(qemu_system_powerdown
);
1338 } else if (qemu_reset_requested()) {
1339 qemu_kvm_system_reset();
1340 } else if (kvm_debug_cpu_requested
) {
1341 gdb_set_stop_cpu(kvm_debug_cpu_requested
);
1342 vm_stop(VMSTOP_DEBUG
);
1343 kvm_debug_cpu_requested
= NULL
;
1348 pause_all_threads();
1349 pthread_mutex_unlock(&qemu_mutex
);
1354 #if !defined(TARGET_I386)
1355 int kvm_arch_init_irq_routing(void)
1363 static int kvm_create_context(void)
1365 static const char upgrade_note
[] =
1366 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
1367 "(see http://sourceforge.net/projects/kvm).\n";
1371 kvm_state
->pit_in_kernel
= kvm_pit
;
1373 #ifdef KVM_CAP_IRQ_ROUTING
1374 kvm_context
->irq_routes
= qemu_mallocz(sizeof(*kvm_context
->irq_routes
));
1375 kvm_context
->nr_allocated_irq_routes
= 0;
1378 kvm_state
->vmfd
= kvm_ioctl(kvm_state
, KVM_CREATE_VM
, 0);
1379 if (kvm_state
->vmfd
< 0) {
1380 fprintf(stderr
, "kvm_create_vm: %m\n");
1381 kvm_finalize(kvm_state
);
1385 r
= kvm_arch_init(kvm_state
);
1387 kvm_finalize(kvm_state
);
1391 kvm_create_irqchip(kvm_context
);
1393 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
1394 * destroyed properly. Since we rely on this capability, refuse to work
1395 * with any kernel without this capability. */
1396 if (!kvm_check_extension(kvm_state
, KVM_CAP_DESTROY_MEMORY_REGION_WORKS
)) {
1398 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
1403 r
= kvm_arch_init_irq_routing();
1408 kvm_state
->coalesced_mmio
= 0;
1409 #ifdef KVM_CAP_COALESCED_MMIO
1410 kvm_state
->coalesced_mmio
=
1411 kvm_check_extension(kvm_state
, KVM_CAP_COALESCED_MMIO
);
1414 kvm_state
->vcpu_events
= 0;
1415 #ifdef KVM_CAP_VCPU_EVENTS
1416 kvm_state
->vcpu_events
= kvm_check_extension(kvm_state
, KVM_CAP_VCPU_EVENTS
);
1419 kvm_state
->debugregs
= 0;
1420 #ifdef KVM_CAP_DEBUGREGS
1421 kvm_state
->debugregs
= kvm_check_extension(kvm_state
, KVM_CAP_DEBUGREGS
);
1424 kvm_state
->xsave
= 0;
1425 #ifdef KVM_CAP_XSAVE
1426 kvm_state
->xsave
= kvm_check_extension(kvm_state
, KVM_CAP_XSAVE
);
1429 kvm_state
->xcrs
= 0;
1431 kvm_state
->xcrs
= kvm_check_extension(kvm_state
, KVM_CAP_XCRS
);
1434 kvm_state
->many_ioeventfds
= kvm_check_many_ioeventfds();
1438 if (!qemu_kvm_has_gsi_routing()) {
1441 /* if kernel can't do irq routing, interrupt source
1442 * override 0->2 can not be set up as required by hpet,
1446 } else if (!qemu_kvm_has_pit_state2()) {
1457 #ifdef KVM_CAP_IRQCHIP
1459 int kvm_set_irq(int irq
, int level
, int *status
)
1461 return kvm_set_irq_level(kvm_context
, irq
, level
, status
);
1466 static void kvm_mutex_unlock(void)
1468 assert(!cpu_single_env
);
1469 pthread_mutex_unlock(&qemu_mutex
);
1472 static void kvm_mutex_lock(void)
1474 pthread_mutex_lock(&qemu_mutex
);
1475 cpu_single_env
= NULL
;
1478 void qemu_mutex_unlock_iothread(void)
1480 if (kvm_enabled()) {
1485 void qemu_mutex_lock_iothread(void)
1487 if (kvm_enabled()) {
1492 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1493 void kvm_add_ioperm_data(struct ioperm_data
*data
)
1495 QLIST_INSERT_HEAD(&ioperm_head
, data
, entries
);
1498 void kvm_remove_ioperm_data(unsigned long start_port
, unsigned long num
)
1500 struct ioperm_data
*data
;
1502 data
= QLIST_FIRST(&ioperm_head
);
1504 struct ioperm_data
*next
= QLIST_NEXT(data
, entries
);
1506 if (data
->start_port
== start_port
&& data
->num
== num
) {
1507 QLIST_REMOVE(data
, entries
);
1515 void kvm_ioperm(CPUState
*env
, void *data
)
1517 if (kvm_enabled() && qemu_system_ready
) {
1518 on_vcpu(env
, kvm_arch_do_ioperm
, data
);
1524 int kvm_set_boot_cpu_id(uint32_t id
)
1526 return kvm_set_boot_vcpu_id(kvm_context
, id
);