4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
9 #include "config-host.h"
15 #include "qemu-common.h"
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
29 #include <sys/ioctl.h>
31 #include <sys/prctl.h>
37 #define PR_MCE_KILL 33
41 #define BUS_MCEERR_AR 4
44 #define BUS_MCEERR_AO 5
47 #define EXPECTED_KVM_API_VERSION 12
49 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
50 #error libkvm: userspace and kernel version mismatch
55 int kvm_pit_reinject
= 1;
60 kvm_context_t kvm_context
;
62 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
63 pthread_cond_t qemu_vcpu_cond
= PTHREAD_COND_INITIALIZER
;
64 pthread_cond_t qemu_system_cond
= PTHREAD_COND_INITIALIZER
;
65 pthread_cond_t qemu_pause_cond
= PTHREAD_COND_INITIALIZER
;
66 pthread_cond_t qemu_work_cond
= PTHREAD_COND_INITIALIZER
;
67 __thread CPUState
*current_env
;
69 static int qemu_system_ready
;
71 #define SIG_IPI (SIGRTMIN+4)
74 static int io_thread_sigfd
= -1;
76 static CPUState
*kvm_debug_cpu_requested
;
78 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
79 /* The list of ioperm_data */
80 static QLIST_HEAD(, ioperm_data
) ioperm_head
;
83 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
85 int kvm_abi
= EXPECTED_KVM_API_VERSION
;
88 #ifdef KVM_CAP_SET_GUEST_DEBUG
89 static int kvm_debug(CPUState
*env
,
90 struct kvm_debug_exit_arch
*arch_info
)
92 int handle
= kvm_arch_debug(arch_info
);
95 kvm_debug_cpu_requested
= env
;
102 static int handle_unhandled(uint64_t reason
)
104 fprintf(stderr
, "kvm: unhandled exit %" PRIx64
"\n", reason
);
108 #define VMX_INVALID_GUEST_STATE 0x80000021
110 static int handle_failed_vmentry(uint64_t reason
)
112 fprintf(stderr
, "kvm: vm entry failed with error 0x%" PRIx64
"\n\n", reason
);
114 /* Perhaps we will need to check if this machine is intel since exit reason 0x21
115 has a different interpretation on SVM */
116 if (reason
== VMX_INVALID_GUEST_STATE
) {
117 fprintf(stderr
, "If you're runnning a guest on an Intel machine without\n");
118 fprintf(stderr
, "unrestricted mode support, the failure can be most likely\n");
119 fprintf(stderr
, "due to the guest entering an invalid state for Intel VT.\n");
120 fprintf(stderr
, "For example, the guest maybe running in big real mode\n");
121 fprintf(stderr
, "which is not supported on less recent Intel processors.\n\n");
127 static inline void set_gsi(kvm_context_t kvm
, unsigned int gsi
)
129 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
131 if (gsi
< kvm
->max_gsi
)
132 bitmap
[gsi
/ 32] |= 1U << (gsi
% 32);
134 DPRINTF("Invalid GSI %u\n", gsi
);
137 static inline void clear_gsi(kvm_context_t kvm
, unsigned int gsi
)
139 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
141 if (gsi
< kvm
->max_gsi
)
142 bitmap
[gsi
/ 32] &= ~(1U << (gsi
% 32));
144 DPRINTF("Invalid GSI %u\n", gsi
);
147 static int kvm_create_context(void);
149 int kvm_init(int smp_cpus
)
155 fd
= open("/dev/kvm", O_RDWR
);
157 perror("open /dev/kvm");
160 r
= ioctl(fd
, KVM_GET_API_VERSION
, 0);
163 "kvm kernel version too old: "
164 "KVM_GET_API_VERSION ioctl not supported\n");
167 if (r
< EXPECTED_KVM_API_VERSION
) {
168 fprintf(stderr
, "kvm kernel version too old: "
169 "We expect API version %d or newer, but got "
170 "version %d\n", EXPECTED_KVM_API_VERSION
, r
);
173 if (r
> EXPECTED_KVM_API_VERSION
) {
174 fprintf(stderr
, "kvm userspace version too old\n");
178 kvm_page_size
= getpagesize();
179 kvm_state
= qemu_mallocz(sizeof(*kvm_state
));
180 kvm_context
= &kvm_state
->kvm_context
;
183 kvm_state
->vmfd
= -1;
184 kvm_context
->opaque
= cpu_single_env
;
185 kvm_context
->dirty_pages_log_all
= 0;
186 kvm_context
->no_irqchip_creation
= 0;
187 kvm_context
->no_pit_creation
= 0;
189 #ifdef KVM_CAP_SET_GUEST_DEBUG
190 QTAILQ_INIT(&kvm_state
->kvm_sw_breakpoints
);
193 gsi_count
= kvm_get_gsi_count(kvm_context
);
197 /* Round up so we can search ints using ffs */
198 gsi_bits
= ALIGN(gsi_count
, 32);
199 kvm_context
->used_gsi_bitmap
= qemu_mallocz(gsi_bits
/ 8);
200 kvm_context
->max_gsi
= gsi_bits
;
202 /* Mark any over-allocated bits as already in use */
203 for (i
= gsi_count
; i
< gsi_bits
; i
++) {
204 set_gsi(kvm_context
, i
);
208 kvm_cpu_register_phys_memory_client();
210 pthread_mutex_lock(&qemu_mutex
);
211 return kvm_create_context();
218 static void kvm_finalize(KVMState
*s
)
221 if (kvm->vcpu_fd[0] != -1)
222 close(kvm->vcpu_fd[0]);
223 if (kvm->vm_fd != -1)
230 void kvm_disable_irqchip_creation(kvm_context_t kvm
)
232 kvm
->no_irqchip_creation
= 1;
235 void kvm_disable_pit_creation(kvm_context_t kvm
)
237 kvm
->no_pit_creation
= 1;
240 static void kvm_reset_vcpu(void *opaque
)
242 CPUState
*env
= opaque
;
244 kvm_arch_cpu_reset(env
);
247 static void kvm_create_vcpu(CPUState
*env
, int id
)
251 KVMState
*s
= kvm_state
;
253 r
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_VCPU
, id
);
255 fprintf(stderr
, "kvm_create_vcpu: %m\n");
256 fprintf(stderr
, "Failed to create vCPU. Check the -smp parameter.\n");
261 env
->kvm_state
= kvm_state
;
263 mmap_size
= kvm_ioctl(kvm_state
, KVM_GET_VCPU_MMAP_SIZE
, 0);
265 fprintf(stderr
, "get vcpu mmap size: %m\n");
269 mmap(NULL
, mmap_size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, env
->kvm_fd
,
271 if (env
->kvm_run
== MAP_FAILED
) {
272 fprintf(stderr
, "mmap vcpu area: %m\n");
276 #ifdef KVM_CAP_COALESCED_MMIO
277 if (s
->coalesced_mmio
&& !s
->coalesced_mmio_ring
)
278 s
->coalesced_mmio_ring
= (void *) env
->kvm_run
+
279 s
->coalesced_mmio
* PAGE_SIZE
;
282 r
= kvm_arch_init_vcpu(env
);
284 qemu_register_reset(kvm_reset_vcpu
, env
);
291 /* We're no good with semi-broken states. */
295 static int kvm_set_boot_vcpu_id(kvm_context_t kvm
, uint32_t id
)
297 #ifdef KVM_CAP_SET_BOOT_CPU_ID
298 int r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_BOOT_CPU_ID
);
300 return kvm_vm_ioctl(kvm_state
, KVM_SET_BOOT_CPU_ID
, id
);
308 int kvm_create_vm(kvm_context_t kvm
)
311 #ifdef KVM_CAP_IRQ_ROUTING
312 kvm
->irq_routes
= qemu_mallocz(sizeof(*kvm
->irq_routes
));
313 kvm
->nr_allocated_irq_routes
= 0;
316 fd
= kvm_ioctl(kvm_state
, KVM_CREATE_VM
, 0);
318 fprintf(stderr
, "kvm_create_vm: %m\n");
321 kvm_state
->vmfd
= fd
;
325 static int kvm_create_default_phys_mem(kvm_context_t kvm
,
326 unsigned long phys_mem_bytes
,
329 #ifdef KVM_CAP_USER_MEMORY
330 int r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_USER_MEMORY
);
334 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
336 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
341 void kvm_create_irqchip(kvm_context_t kvm
)
345 kvm
->irqchip_in_kernel
= 0;
346 #ifdef KVM_CAP_IRQCHIP
347 if (!kvm
->no_irqchip_creation
) {
348 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_IRQCHIP
);
349 if (r
> 0) { /* kernel irqchip supported */
350 r
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_IRQCHIP
);
352 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE
;
353 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
354 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
,
355 KVM_CAP_IRQ_INJECT_STATUS
);
357 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE_STATUS
;
360 kvm
->irqchip_in_kernel
= 1;
362 fprintf(stderr
, "Create kernel PIC irqchip failed\n");
366 kvm_state
->irqchip_in_kernel
= kvm
->irqchip_in_kernel
;
369 int kvm_create(kvm_context_t kvm
, unsigned long phys_mem_bytes
, void **vm_mem
)
373 r
= kvm_create_vm(kvm
);
377 r
= kvm_arch_create(kvm
, phys_mem_bytes
, vm_mem
);
381 for (i
= 0; i
< ARRAY_SIZE(kvm_state
->slots
); i
++) {
382 kvm_state
->slots
[i
].slot
= i
;
385 r
= kvm_create_default_phys_mem(kvm
, phys_mem_bytes
, vm_mem
);
390 kvm_create_irqchip(kvm
);
395 #ifdef KVM_CAP_IRQCHIP
397 int kvm_set_irq_level(kvm_context_t kvm
, int irq
, int level
, int *status
)
399 struct kvm_irq_level event
;
402 if (!kvm
->irqchip_in_kernel
) {
407 r
= kvm_vm_ioctl(kvm_state
, kvm
->irqchip_inject_ioctl
, &event
);
409 perror("kvm_set_irq_level");
413 #ifdef KVM_CAP_IRQ_INJECT_STATUS
415 (kvm
->irqchip_inject_ioctl
== KVM_IRQ_LINE
) ? 1 : event
.status
;
424 int kvm_get_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
428 if (!kvm
->irqchip_in_kernel
) {
431 r
= kvm_vm_ioctl(kvm_state
, KVM_GET_IRQCHIP
, chip
);
433 perror("kvm_get_irqchip\n");
438 int kvm_set_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
442 if (!kvm
->irqchip_in_kernel
) {
445 r
= kvm_vm_ioctl(kvm_state
, KVM_SET_IRQCHIP
, chip
);
447 perror("kvm_set_irqchip\n");
454 static int handle_debug(CPUState
*env
)
456 #ifdef KVM_CAP_SET_GUEST_DEBUG
457 struct kvm_run
*run
= env
->kvm_run
;
459 return kvm_debug(env
, &run
->debug
.arch
);
465 int kvm_get_regs(CPUState
*env
, struct kvm_regs
*regs
)
467 return kvm_vcpu_ioctl(env
, KVM_GET_REGS
, regs
);
470 int kvm_set_regs(CPUState
*env
, struct kvm_regs
*regs
)
472 return kvm_vcpu_ioctl(env
, KVM_SET_REGS
, regs
);
475 int kvm_get_fpu(CPUState
*env
, struct kvm_fpu
*fpu
)
477 return kvm_vcpu_ioctl(env
, KVM_GET_FPU
, fpu
);
480 int kvm_set_fpu(CPUState
*env
, struct kvm_fpu
*fpu
)
482 return kvm_vcpu_ioctl(env
, KVM_SET_FPU
, fpu
);
485 int kvm_get_sregs(CPUState
*env
, struct kvm_sregs
*sregs
)
487 return kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, sregs
);
490 int kvm_set_sregs(CPUState
*env
, struct kvm_sregs
*sregs
)
492 return kvm_vcpu_ioctl(env
, KVM_SET_SREGS
, sregs
);
495 #ifdef KVM_CAP_MP_STATE
496 int kvm_get_mpstate(CPUState
*env
, struct kvm_mp_state
*mp_state
)
500 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_MP_STATE
);
502 return kvm_vcpu_ioctl(env
, KVM_GET_MP_STATE
, mp_state
);
507 int kvm_set_mpstate(CPUState
*env
, struct kvm_mp_state
*mp_state
)
511 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_MP_STATE
);
513 return kvm_vcpu_ioctl(env
, KVM_SET_MP_STATE
, mp_state
);
520 int kvm_get_xsave(CPUState
*env
, struct kvm_xsave
*xsave
)
522 return kvm_vcpu_ioctl(env
, KVM_GET_XSAVE
, xsave
);
525 int kvm_set_xsave(CPUState
*env
, struct kvm_xsave
*xsave
)
527 return kvm_vcpu_ioctl(env
, KVM_SET_XSAVE
, xsave
);
532 int kvm_get_xcrs(CPUState
*env
, struct kvm_xcrs
*xcrs
)
534 return kvm_vcpu_ioctl(env
, KVM_GET_XCRS
, xcrs
);
537 int kvm_set_xcrs(CPUState
*env
, struct kvm_xcrs
*xcrs
)
539 return kvm_vcpu_ioctl(env
, KVM_SET_XCRS
, xcrs
);
543 static int handle_mmio(CPUState
*env
)
545 unsigned long addr
= env
->kvm_run
->mmio
.phys_addr
;
546 struct kvm_run
*kvm_run
= env
->kvm_run
;
547 void *data
= kvm_run
->mmio
.data
;
549 /* hack: Red Hat 7.1 generates these weird accesses. */
550 if ((addr
> 0xa0000 - 4 && addr
<= 0xa0000) && kvm_run
->mmio
.len
== 3) {
554 cpu_physical_memory_rw(addr
, data
, kvm_run
->mmio
.len
, kvm_run
->mmio
.is_write
);
558 int handle_io_window(kvm_context_t kvm
)
563 int handle_shutdown(kvm_context_t kvm
, CPUState
*env
)
565 /* stop the current vcpu from going back to guest mode */
568 qemu_system_reset_request();
572 static inline void push_nmi(kvm_context_t kvm
)
574 #ifdef KVM_CAP_USER_NMI
575 kvm_arch_push_nmi(kvm
->opaque
);
576 #endif /* KVM_CAP_USER_NMI */
579 void post_kvm_run(kvm_context_t kvm
, CPUState
*env
)
581 pthread_mutex_lock(&qemu_mutex
);
582 kvm_arch_post_run(env
, env
->kvm_run
);
583 cpu_single_env
= env
;
586 int pre_kvm_run(kvm_context_t kvm
, CPUState
*env
)
588 kvm_arch_pre_run(env
, env
->kvm_run
);
590 pthread_mutex_unlock(&qemu_mutex
);
594 int kvm_is_ready_for_interrupt_injection(CPUState
*env
)
596 return env
->kvm_run
->ready_for_interrupt_injection
;
599 int kvm_run(CPUState
*env
)
602 kvm_context_t kvm
= &env
->kvm_state
->kvm_context
;
603 struct kvm_run
*run
= env
->kvm_run
;
604 int fd
= env
->kvm_fd
;
607 if (env
->kvm_vcpu_dirty
) {
608 kvm_arch_load_regs(env
, KVM_PUT_RUNTIME_STATE
);
609 env
->kvm_vcpu_dirty
= 0;
612 #if !defined(__s390__)
613 if (!kvm
->irqchip_in_kernel
) {
614 run
->request_interrupt_window
= kvm_arch_try_push_interrupts(env
);
618 r
= pre_kvm_run(kvm
, env
);
622 if (env
->exit_request
) {
623 env
->exit_request
= 0;
624 pthread_kill(env
->kvm_cpu_state
.thread
, SIG_IPI
);
626 r
= ioctl(fd
, KVM_RUN
, 0);
628 if (r
== -1 && errno
!= EINTR
&& errno
!= EAGAIN
) {
630 post_kvm_run(kvm
, env
);
631 fprintf(stderr
, "kvm_run: %s\n", strerror(-r
));
635 post_kvm_run(kvm
, env
);
637 kvm_flush_coalesced_mmio_buffer();
639 #if !defined(__s390__)
641 r
= handle_io_window(kvm
);
646 switch (run
->exit_reason
) {
647 case KVM_EXIT_UNKNOWN
:
648 r
= handle_unhandled(run
->hw
.hardware_exit_reason
);
650 case KVM_EXIT_FAIL_ENTRY
:
651 r
= handle_failed_vmentry(run
->fail_entry
.hardware_entry_failure_reason
);
653 case KVM_EXIT_EXCEPTION
:
654 fprintf(stderr
, "exception %d (%x)\n", run
->ex
.exception
,
661 r
= kvm_handle_io(run
->io
.port
,
662 (uint8_t *)run
+ run
->io
.data_offset
,
669 r
= handle_debug(env
);
672 r
= handle_mmio(env
);
675 r
= kvm_arch_halt(env
);
677 case KVM_EXIT_IRQ_WINDOW_OPEN
:
679 case KVM_EXIT_SHUTDOWN
:
680 r
= handle_shutdown(kvm
, env
);
682 #if defined(__s390__)
683 case KVM_EXIT_S390_SIEIC
:
684 r
= kvm_s390_handle_intercept(kvm
, env
, run
);
686 case KVM_EXIT_S390_RESET
:
687 r
= kvm_s390_handle_reset(kvm
, env
, run
);
690 case KVM_EXIT_INTERNAL_ERROR
:
691 kvm_handle_internal_error(env
, run
);
695 if (kvm_arch_run(env
)) {
696 fprintf(stderr
, "unhandled vm exit: 0x%x\n", run
->exit_reason
);
710 int kvm_inject_irq(CPUState
*env
, unsigned irq
)
712 struct kvm_interrupt intr
;
715 return kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &intr
);
718 int kvm_inject_nmi(CPUState
*env
)
720 #ifdef KVM_CAP_USER_NMI
721 return kvm_vcpu_ioctl(env
, KVM_NMI
);
727 int kvm_init_coalesced_mmio(kvm_context_t kvm
)
730 kvm_state
->coalesced_mmio
= 0;
731 #ifdef KVM_CAP_COALESCED_MMIO
732 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_COALESCED_MMIO
);
734 kvm_state
->coalesced_mmio
= r
;
741 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
742 int kvm_assign_pci_device(kvm_context_t kvm
,
743 struct kvm_assigned_pci_dev
*assigned_dev
)
745 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_PCI_DEVICE
, assigned_dev
);
748 static int kvm_old_assign_irq(kvm_context_t kvm
,
749 struct kvm_assigned_irq
*assigned_irq
)
751 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_IRQ
, assigned_irq
);
754 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
755 int kvm_assign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
759 ret
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_ASSIGN_DEV_IRQ
);
761 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_DEV_IRQ
, assigned_irq
);
764 return kvm_old_assign_irq(kvm
, assigned_irq
);
767 int kvm_deassign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
769 return kvm_vm_ioctl(kvm_state
, KVM_DEASSIGN_DEV_IRQ
, assigned_irq
);
772 int kvm_assign_irq(kvm_context_t kvm
, struct kvm_assigned_irq
*assigned_irq
)
774 return kvm_old_assign_irq(kvm
, assigned_irq
);
779 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
780 int kvm_deassign_pci_device(kvm_context_t kvm
,
781 struct kvm_assigned_pci_dev
*assigned_dev
)
783 return kvm_vm_ioctl(kvm_state
, KVM_DEASSIGN_PCI_DEVICE
, assigned_dev
);
787 int kvm_reinject_control(kvm_context_t kvm
, int pit_reinject
)
789 #ifdef KVM_CAP_REINJECT_CONTROL
791 struct kvm_reinject_control control
;
793 control
.pit_reinject
= pit_reinject
;
795 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_REINJECT_CONTROL
);
797 return kvm_vm_ioctl(kvm_state
, KVM_REINJECT_CONTROL
, &control
);
803 int kvm_has_gsi_routing(void)
807 #ifdef KVM_CAP_IRQ_ROUTING
808 r
= kvm_check_extension(kvm_state
, KVM_CAP_IRQ_ROUTING
);
813 int kvm_get_gsi_count(kvm_context_t kvm
)
815 #ifdef KVM_CAP_IRQ_ROUTING
816 return kvm_check_extension(kvm_state
, KVM_CAP_IRQ_ROUTING
);
822 int kvm_clear_gsi_routes(void)
824 #ifdef KVM_CAP_IRQ_ROUTING
825 kvm_context_t kvm
= kvm_context
;
827 kvm
->irq_routes
->nr
= 0;
834 int kvm_add_routing_entry(struct kvm_irq_routing_entry
*entry
)
836 #ifdef KVM_CAP_IRQ_ROUTING
837 kvm_context_t kvm
= kvm_context
;
838 struct kvm_irq_routing
*z
;
839 struct kvm_irq_routing_entry
*new;
842 if (kvm
->irq_routes
->nr
== kvm
->nr_allocated_irq_routes
) {
843 n
= kvm
->nr_allocated_irq_routes
* 2;
847 size
= sizeof(struct kvm_irq_routing
);
848 size
+= n
* sizeof(*new);
849 z
= realloc(kvm
->irq_routes
, size
);
853 kvm
->nr_allocated_irq_routes
= n
;
856 n
= kvm
->irq_routes
->nr
++;
857 new = &kvm
->irq_routes
->entries
[n
];
858 memset(new, 0, sizeof(*new));
859 new->gsi
= entry
->gsi
;
860 new->type
= entry
->type
;
861 new->flags
= entry
->flags
;
864 set_gsi(kvm
, entry
->gsi
);
872 int kvm_add_irq_route(int gsi
, int irqchip
, int pin
)
874 #ifdef KVM_CAP_IRQ_ROUTING
875 struct kvm_irq_routing_entry e
;
878 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
880 e
.u
.irqchip
.irqchip
= irqchip
;
881 e
.u
.irqchip
.pin
= pin
;
882 return kvm_add_routing_entry(&e
);
888 int kvm_del_routing_entry(struct kvm_irq_routing_entry
*entry
)
890 #ifdef KVM_CAP_IRQ_ROUTING
891 kvm_context_t kvm
= kvm_context
;
892 struct kvm_irq_routing_entry
*e
, *p
;
893 int i
, gsi
, found
= 0;
897 for (i
= 0; i
< kvm
->irq_routes
->nr
; ++i
) {
898 e
= &kvm
->irq_routes
->entries
[i
];
899 if (e
->type
== entry
->type
&& e
->gsi
== gsi
) {
901 case KVM_IRQ_ROUTING_IRQCHIP
:{
902 if (e
->u
.irqchip
.irqchip
==
903 entry
->u
.irqchip
.irqchip
904 && e
->u
.irqchip
.pin
== entry
->u
.irqchip
.pin
) {
905 p
= &kvm
->irq_routes
->entries
[--kvm
->irq_routes
->nr
];
911 case KVM_IRQ_ROUTING_MSI
:{
912 if (e
->u
.msi
.address_lo
==
913 entry
->u
.msi
.address_lo
914 && e
->u
.msi
.address_hi
==
915 entry
->u
.msi
.address_hi
916 && e
->u
.msi
.data
== entry
->u
.msi
.data
) {
917 p
= &kvm
->irq_routes
->entries
[--kvm
->irq_routes
->nr
];
927 /* If there are no other users of this GSI
928 * mark it available in the bitmap */
929 for (i
= 0; i
< kvm
->irq_routes
->nr
; i
++) {
930 e
= &kvm
->irq_routes
->entries
[i
];
934 if (i
== kvm
->irq_routes
->nr
) {
948 int kvm_update_routing_entry(struct kvm_irq_routing_entry
*entry
,
949 struct kvm_irq_routing_entry
*newentry
)
951 #ifdef KVM_CAP_IRQ_ROUTING
952 kvm_context_t kvm
= kvm_context
;
953 struct kvm_irq_routing_entry
*e
;
956 if (entry
->gsi
!= newentry
->gsi
|| entry
->type
!= newentry
->type
) {
960 for (i
= 0; i
< kvm
->irq_routes
->nr
; ++i
) {
961 e
= &kvm
->irq_routes
->entries
[i
];
962 if (e
->type
!= entry
->type
|| e
->gsi
!= entry
->gsi
) {
966 case KVM_IRQ_ROUTING_IRQCHIP
:
967 if (e
->u
.irqchip
.irqchip
== entry
->u
.irqchip
.irqchip
&&
968 e
->u
.irqchip
.pin
== entry
->u
.irqchip
.pin
) {
969 memcpy(&e
->u
.irqchip
, &newentry
->u
.irqchip
,
970 sizeof e
->u
.irqchip
);
974 case KVM_IRQ_ROUTING_MSI
:
975 if (e
->u
.msi
.address_lo
== entry
->u
.msi
.address_lo
&&
976 e
->u
.msi
.address_hi
== entry
->u
.msi
.address_hi
&&
977 e
->u
.msi
.data
== entry
->u
.msi
.data
) {
978 memcpy(&e
->u
.msi
, &newentry
->u
.msi
, sizeof e
->u
.msi
);
992 int kvm_del_irq_route(int gsi
, int irqchip
, int pin
)
994 #ifdef KVM_CAP_IRQ_ROUTING
995 struct kvm_irq_routing_entry e
;
998 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
1000 e
.u
.irqchip
.irqchip
= irqchip
;
1001 e
.u
.irqchip
.pin
= pin
;
1002 return kvm_del_routing_entry(&e
);
1008 int kvm_commit_irq_routes(void)
1010 #ifdef KVM_CAP_IRQ_ROUTING
1011 kvm_context_t kvm
= kvm_context
;
1013 kvm
->irq_routes
->flags
= 0;
1014 return kvm_vm_ioctl(kvm_state
, KVM_SET_GSI_ROUTING
, kvm
->irq_routes
);
1020 int kvm_get_irq_route_gsi(void)
1022 kvm_context_t kvm
= kvm_context
;
1024 uint32_t *buf
= kvm
->used_gsi_bitmap
;
1026 /* Return the lowest unused GSI in the bitmap */
1027 for (i
= 0; i
< kvm
->max_gsi
/ 32; i
++) {
1033 return bit
- 1 + i
* 32;
1039 static void kvm_msix_routing_entry(struct kvm_irq_routing_entry
*e
,
1040 uint32_t gsi
, uint32_t addr_lo
,
1041 uint32_t addr_hi
, uint32_t data
)
1045 e
->type
= KVM_IRQ_ROUTING_MSI
;
1047 e
->u
.msi
.address_lo
= addr_lo
;
1048 e
->u
.msi
.address_hi
= addr_hi
;
1049 e
->u
.msi
.data
= data
;
1052 int kvm_add_msix(uint32_t gsi
, uint32_t addr_lo
,
1053 uint32_t addr_hi
, uint32_t data
)
1055 struct kvm_irq_routing_entry e
;
1057 kvm_msix_routing_entry(&e
, gsi
, addr_lo
, addr_hi
, data
);
1058 return kvm_add_routing_entry(&e
);
1061 int kvm_del_msix(uint32_t gsi
, uint32_t addr_lo
,
1062 uint32_t addr_hi
, uint32_t data
)
1064 struct kvm_irq_routing_entry e
;
1066 kvm_msix_routing_entry(&e
, gsi
, addr_lo
, addr_hi
, data
);
1067 return kvm_del_routing_entry(&e
);
1070 int kvm_update_msix(uint32_t old_gsi
, uint32_t old_addr_lo
,
1071 uint32_t old_addr_hi
, uint32_t old_data
,
1072 uint32_t new_gsi
, uint32_t new_addr_lo
,
1073 uint32_t new_addr_hi
, uint32_t new_data
)
1075 struct kvm_irq_routing_entry e1
, e2
;
1077 kvm_msix_routing_entry(&e1
, old_gsi
, old_addr_lo
, old_addr_hi
, old_data
);
1078 kvm_msix_routing_entry(&e2
, new_gsi
, new_addr_lo
, new_addr_hi
, new_data
);
1079 return kvm_update_routing_entry(&e1
, &e2
);
1083 #ifdef KVM_CAP_DEVICE_MSIX
1084 int kvm_assign_set_msix_nr(kvm_context_t kvm
,
1085 struct kvm_assigned_msix_nr
*msix_nr
)
1087 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_SET_MSIX_NR
, msix_nr
);
1090 int kvm_assign_set_msix_entry(kvm_context_t kvm
,
1091 struct kvm_assigned_msix_entry
*entry
)
1093 return kvm_vm_ioctl(kvm_state
, KVM_ASSIGN_SET_MSIX_ENTRY
, entry
);
1097 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1099 #include <sys/eventfd.h>
1101 static int _kvm_irqfd(kvm_context_t kvm
, int fd
, int gsi
, int flags
)
1103 struct kvm_irqfd data
= {
1109 return kvm_vm_ioctl(kvm_state
, KVM_IRQFD
, &data
);
1112 int kvm_irqfd(kvm_context_t kvm
, int gsi
, int flags
)
1117 if (!kvm_check_extension(kvm_state
, KVM_CAP_IRQFD
))
1125 r
= _kvm_irqfd(kvm
, fd
, gsi
, 0);
1134 #else /* KVM_CAP_IRQFD */
1136 int kvm_irqfd(kvm_context_t kvm
, int gsi
, int flags
)
1141 #endif /* KVM_CAP_IRQFD */
1142 unsigned long kvm_get_thread_id(void)
1144 return syscall(SYS_gettid
);
1147 static void qemu_cond_wait(pthread_cond_t
*cond
)
1149 CPUState
*env
= cpu_single_env
;
1151 pthread_cond_wait(cond
, &qemu_mutex
);
1152 cpu_single_env
= env
;
1155 static void sig_ipi_handler(int n
)
1159 static void hardware_memory_error(void)
1161 fprintf(stderr
, "Hardware memory error!\n");
1165 static void sigbus_reraise(void)
1168 struct sigaction action
;
1170 memset(&action
, 0, sizeof(action
));
1171 action
.sa_handler
= SIG_DFL
;
1172 if (!sigaction(SIGBUS
, &action
, NULL
)) {
1175 sigaddset(&set
, SIGBUS
);
1176 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
1178 perror("Failed to re-raise SIGBUS!\n");
1182 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
1185 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1186 if ((first_cpu
->mcg_cap
& MCG_SER_P
) && siginfo
->ssi_addr
1187 && siginfo
->ssi_code
== BUS_MCEERR_AO
) {
1190 ram_addr_t ram_addr
;
1191 unsigned long paddr
;
1194 /* Hope we are lucky for AO MCE */
1195 vaddr
= (void *)(intptr_t)siginfo
->ssi_addr
;
1196 if (qemu_ram_addr_from_host(vaddr
, &ram_addr
) ||
1197 !kvm_physical_memory_addr_from_ram(kvm_state
, ram_addr
, (target_phys_addr_t
*)&paddr
)) {
1198 fprintf(stderr
, "Hardware memory error for memory used by "
1199 "QEMU itself instead of guest system!: %llx\n",
1200 (unsigned long long)siginfo
->ssi_addr
);
1203 status
= MCI_STATUS_VAL
| MCI_STATUS_UC
| MCI_STATUS_EN
1204 | MCI_STATUS_MISCV
| MCI_STATUS_ADDRV
| MCI_STATUS_S
1207 kvm_inject_x86_mce(first_cpu
, 9, status
,
1208 MCG_STATUS_MCIP
| MCG_STATUS_RIPV
, paddr
,
1209 (MCM_ADDR_PHYS
<< 6) | 0xc, 1);
1210 for (cenv
= first_cpu
->next_cpu
; cenv
!= NULL
; cenv
= cenv
->next_cpu
) {
1211 kvm_inject_x86_mce(cenv
, 1, MCI_STATUS_VAL
| MCI_STATUS_UC
,
1212 MCG_STATUS_MCIP
| MCG_STATUS_RIPV
, 0, 0, 1);
1218 if (siginfo
->ssi_code
== BUS_MCEERR_AO
) {
1220 } else if (siginfo
->ssi_code
== BUS_MCEERR_AR
) {
1221 hardware_memory_error();
1228 static void on_vcpu(CPUState
*env
, void (*func
)(void *data
), void *data
)
1230 struct qemu_work_item wi
;
1232 if (env
== current_env
) {
1239 if (!env
->kvm_cpu_state
.queued_work_first
) {
1240 env
->kvm_cpu_state
.queued_work_first
= &wi
;
1242 env
->kvm_cpu_state
.queued_work_last
->next
= &wi
;
1244 env
->kvm_cpu_state
.queued_work_last
= &wi
;
1248 pthread_kill(env
->kvm_cpu_state
.thread
, SIG_IPI
);
1250 qemu_cond_wait(&qemu_work_cond
);
1254 static void do_kvm_cpu_synchronize_state(void *_env
)
1256 CPUState
*env
= _env
;
1258 if (!env
->kvm_vcpu_dirty
) {
1259 kvm_arch_save_regs(env
);
1260 env
->kvm_vcpu_dirty
= 1;
1264 void kvm_cpu_synchronize_state(CPUState
*env
)
1266 if (!env
->kvm_vcpu_dirty
) {
1267 on_vcpu(env
, do_kvm_cpu_synchronize_state
, env
);
1271 void kvm_cpu_synchronize_post_reset(CPUState
*env
)
1273 kvm_arch_load_regs(env
, KVM_PUT_RESET_STATE
);
1274 env
->kvm_vcpu_dirty
= 0;
1277 void kvm_cpu_synchronize_post_init(CPUState
*env
)
1279 kvm_arch_load_regs(env
, KVM_PUT_FULL_STATE
);
1280 env
->kvm_vcpu_dirty
= 0;
1283 static void inject_interrupt(void *data
)
1285 cpu_interrupt(current_env
, (long) data
);
1288 void kvm_inject_interrupt(CPUState
*env
, int mask
)
1290 on_vcpu(env
, inject_interrupt
, (void *) (long) mask
);
1293 void kvm_update_interrupt_request(CPUState
*env
)
1298 if (!current_env
|| !current_env
->created
) {
1302 * Testing for created here is really redundant
1304 if (current_env
&& current_env
->created
&&
1305 env
!= current_env
&& !env
->kvm_cpu_state
.signalled
) {
1310 env
->kvm_cpu_state
.signalled
= 1;
1311 if (env
->kvm_cpu_state
.thread
) {
1312 pthread_kill(env
->kvm_cpu_state
.thread
, SIG_IPI
);
1318 int kvm_cpu_exec(CPUState
*env
)
1324 printf("kvm_run returned %d\n", r
);
1331 int kvm_cpu_is_stopped(CPUState
*env
)
1333 return !vm_running
|| env
->stopped
;
1336 static void flush_queued_work(CPUState
*env
)
1338 struct qemu_work_item
*wi
;
1340 if (!env
->kvm_cpu_state
.queued_work_first
) {
1344 while ((wi
= env
->kvm_cpu_state
.queued_work_first
)) {
1345 env
->kvm_cpu_state
.queued_work_first
= wi
->next
;
1349 env
->kvm_cpu_state
.queued_work_last
= NULL
;
1350 pthread_cond_broadcast(&qemu_work_cond
);
1353 static int kvm_mce_in_exception(CPUState
*env
)
1355 struct kvm_msr_entry msr_mcg_status
= {
1356 .index
= MSR_MCG_STATUS
,
1360 r
= kvm_get_msrs(env
, &msr_mcg_status
, 1);
1361 if (r
== -1 || r
== 0) {
1364 return !!(msr_mcg_status
.data
& MCG_STATUS_MCIP
);
1367 static void kvm_on_sigbus(CPUState
*env
, siginfo_t
*siginfo
)
1369 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1370 struct kvm_x86_mce mce
= {
1374 ram_addr_t ram_addr
;
1375 unsigned long paddr
;
1378 if ((env
->mcg_cap
& MCG_SER_P
) && siginfo
->si_addr
1379 && (siginfo
->si_code
== BUS_MCEERR_AR
1380 || siginfo
->si_code
== BUS_MCEERR_AO
)) {
1381 if (siginfo
->si_code
== BUS_MCEERR_AR
) {
1382 /* Fake an Intel architectural Data Load SRAR UCR */
1383 mce
.status
= MCI_STATUS_VAL
| MCI_STATUS_UC
| MCI_STATUS_EN
1384 | MCI_STATUS_MISCV
| MCI_STATUS_ADDRV
| MCI_STATUS_S
1385 | MCI_STATUS_AR
| 0x134;
1386 mce
.misc
= (MCM_ADDR_PHYS
<< 6) | 0xc;
1387 mce
.mcg_status
= MCG_STATUS_MCIP
| MCG_STATUS_EIPV
;
1390 * If there is an MCE excpetion being processed, ignore
1393 r
= kvm_mce_in_exception(env
);
1395 fprintf(stderr
, "Failed to get MCE status\n");
1399 /* Fake an Intel architectural Memory scrubbing UCR */
1400 mce
.status
= MCI_STATUS_VAL
| MCI_STATUS_UC
| MCI_STATUS_EN
1401 | MCI_STATUS_MISCV
| MCI_STATUS_ADDRV
| MCI_STATUS_S
1403 mce
.misc
= (MCM_ADDR_PHYS
<< 6) | 0xc;
1404 mce
.mcg_status
= MCG_STATUS_MCIP
| MCG_STATUS_RIPV
;
1406 vaddr
= (void *)siginfo
->si_addr
;
1407 if (qemu_ram_addr_from_host(vaddr
, &ram_addr
) ||
1408 !kvm_physical_memory_addr_from_ram(kvm_state
, ram_addr
, (target_phys_addr_t
*)&paddr
)) {
1409 fprintf(stderr
, "Hardware memory error for memory used by "
1410 "QEMU itself instead of guest system!\n");
1411 /* Hope we are lucky for AO MCE */
1412 if (siginfo
->si_code
== BUS_MCEERR_AO
) {
1415 hardware_memory_error();
1419 // r = kvm_set_mce(env, &mce);
1422 fprintf(stderr
, "kvm_set_mce: %s\n", strerror(errno
));
1428 if (siginfo
->si_code
== BUS_MCEERR_AO
) {
1430 } else if (siginfo
->si_code
== BUS_MCEERR_AR
) {
1431 hardware_memory_error();
1438 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
1446 ts
.tv_sec
= timeout
/ 1000;
1447 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
1448 sigemptyset(&waitset
);
1449 sigaddset(&waitset
, SIG_IPI
);
1450 sigaddset(&waitset
, SIGBUS
);
1453 pthread_mutex_unlock(&qemu_mutex
);
1455 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
1458 pthread_mutex_lock(&qemu_mutex
);
1460 if (r
== -1 && !(e
== EAGAIN
|| e
== EINTR
)) {
1461 printf("sigtimedwait: %s\n", strerror(e
));
1467 kvm_on_sigbus(env
, &siginfo
);
1473 r
= sigpending(&chkset
);
1475 printf("sigpending: %s\n", strerror(e
));
1478 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
1480 cpu_single_env
= env
;
1481 flush_queued_work(env
);
1486 pthread_cond_signal(&qemu_pause_cond
);
1489 env
->kvm_cpu_state
.signalled
= 0;
1492 static int all_threads_paused(void)
1494 CPUState
*penv
= first_cpu
;
1500 penv
= (CPUState
*) penv
->next_cpu
;
1506 static void pause_all_threads(void)
1508 CPUState
*penv
= first_cpu
;
1511 if (penv
!= cpu_single_env
) {
1513 pthread_kill(penv
->kvm_cpu_state
.thread
, SIG_IPI
);
1519 penv
= (CPUState
*) penv
->next_cpu
;
1522 while (!all_threads_paused()) {
1523 qemu_cond_wait(&qemu_pause_cond
);
1527 static void resume_all_threads(void)
1529 CPUState
*penv
= first_cpu
;
1531 assert(!cpu_single_env
);
1536 pthread_kill(penv
->kvm_cpu_state
.thread
, SIG_IPI
);
1537 penv
= (CPUState
*) penv
->next_cpu
;
1541 static void kvm_vm_state_change_handler(void *context
, int running
, int reason
)
1544 resume_all_threads();
1546 pause_all_threads();
1550 static void setup_kernel_sigmask(CPUState
*env
)
1555 sigaddset(&set
, SIGUSR2
);
1556 sigaddset(&set
, SIGIO
);
1557 sigaddset(&set
, SIGALRM
);
1558 sigprocmask(SIG_BLOCK
, &set
, NULL
);
1560 sigprocmask(SIG_BLOCK
, NULL
, &set
);
1561 sigdelset(&set
, SIG_IPI
);
1562 sigdelset(&set
, SIGBUS
);
1564 kvm_set_signal_mask(env
, &set
);
1567 static void qemu_kvm_system_reset(void)
1569 pause_all_threads();
1571 qemu_system_reset();
1573 resume_all_threads();
1576 static void process_irqchip_events(CPUState
*env
)
1578 kvm_arch_process_irqchip_events(env
);
1579 if (kvm_arch_has_work(env
))
1583 static int kvm_main_loop_cpu(CPUState
*env
)
1586 int run_cpu
= !kvm_cpu_is_stopped(env
);
1587 if (run_cpu
&& !kvm_irqchip_in_kernel()) {
1588 process_irqchip_events(env
);
1589 run_cpu
= !env
->halted
;
1593 kvm_main_loop_wait(env
, 0);
1595 kvm_main_loop_wait(env
, 1000);
1598 pthread_mutex_unlock(&qemu_mutex
);
1602 static void *ap_main_loop(void *_env
)
1604 CPUState
*env
= _env
;
1606 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1607 struct ioperm_data
*data
= NULL
;
1611 env
->thread_id
= kvm_get_thread_id();
1612 sigfillset(&signals
);
1613 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
1615 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1616 /* do ioperm for io ports of assigned devices */
1617 QLIST_FOREACH(data
, &ioperm_head
, entries
)
1618 on_vcpu(env
, kvm_arch_do_ioperm
, data
);
1621 pthread_mutex_lock(&qemu_mutex
);
1622 cpu_single_env
= env
;
1624 kvm_create_vcpu(env
, env
->cpu_index
);
1625 setup_kernel_sigmask(env
);
1627 /* signal VCPU creation */
1628 current_env
->created
= 1;
1629 pthread_cond_signal(&qemu_vcpu_cond
);
1631 /* and wait for machine initialization */
1632 while (!qemu_system_ready
) {
1633 qemu_cond_wait(&qemu_system_cond
);
1636 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1637 cpu_single_env
= env
;
1639 kvm_main_loop_cpu(env
);
1643 int kvm_init_vcpu(CPUState
*env
)
1645 pthread_create(&env
->kvm_cpu_state
.thread
, NULL
, ap_main_loop
, env
);
1647 while (env
->created
== 0) {
1648 qemu_cond_wait(&qemu_vcpu_cond
);
1654 int kvm_vcpu_inited(CPUState
*env
)
1656 return env
->created
;
1660 void kvm_hpet_disable_kpit(void)
1662 struct kvm_pit_state2 ps2
;
1664 kvm_get_pit2(kvm_context
, &ps2
);
1665 ps2
.flags
|= KVM_PIT_FLAGS_HPET_LEGACY
;
1666 kvm_set_pit2(kvm_context
, &ps2
);
1669 void kvm_hpet_enable_kpit(void)
1671 struct kvm_pit_state2 ps2
;
1673 kvm_get_pit2(kvm_context
, &ps2
);
1674 ps2
.flags
&= ~KVM_PIT_FLAGS_HPET_LEGACY
;
1675 kvm_set_pit2(kvm_context
, &ps2
);
1679 int kvm_init_ap(void)
1681 struct sigaction action
;
1683 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
1685 signal(SIG_IPI
, sig_ipi_handler
);
1687 memset(&action
, 0, sizeof(action
));
1688 action
.sa_flags
= SA_SIGINFO
;
1689 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
1690 sigaction(SIGBUS
, &action
, NULL
);
1691 prctl(PR_MCE_KILL
, 1, 1, 0, 0);
1695 /* If we have signalfd, we mask out the signals we want to handle and then
1696 * use signalfd to listen for them. We rely on whatever the current signal
1697 * handler is to dispatch the signals when we receive them.
1700 static void sigfd_handler(void *opaque
)
1702 int fd
= (unsigned long) opaque
;
1703 struct qemu_signalfd_siginfo info
;
1704 struct sigaction action
;
1709 len
= read(fd
, &info
, sizeof(info
));
1710 } while (len
== -1 && errno
== EINTR
);
1712 if (len
== -1 && errno
== EAGAIN
) {
1716 if (len
!= sizeof(info
)) {
1717 printf("read from sigfd returned %zd: %m\n", len
);
1721 sigaction(info
.ssi_signo
, NULL
, &action
);
1722 if ((action
.sa_flags
& SA_SIGINFO
) && action
.sa_sigaction
) {
1723 action
.sa_sigaction(info
.ssi_signo
,
1724 (siginfo_t
*)&info
, NULL
);
1725 } else if (action
.sa_handler
) {
1726 action
.sa_handler(info
.ssi_signo
);
1731 int kvm_main_loop(void)
1736 io_thread
= pthread_self();
1737 qemu_system_ready
= 1;
1740 sigaddset(&mask
, SIGIO
);
1741 sigaddset(&mask
, SIGALRM
);
1742 sigaddset(&mask
, SIGBUS
);
1743 sigprocmask(SIG_BLOCK
, &mask
, NULL
);
1745 sigfd
= qemu_signalfd(&mask
);
1747 fprintf(stderr
, "failed to create signalfd\n");
1751 fcntl(sigfd
, F_SETFL
, O_NONBLOCK
);
1753 qemu_set_fd_handler2(sigfd
, NULL
, sigfd_handler
, NULL
,
1754 (void *)(unsigned long) sigfd
);
1756 pthread_cond_broadcast(&qemu_system_cond
);
1758 io_thread_sigfd
= sigfd
;
1759 cpu_single_env
= NULL
;
1763 if (qemu_shutdown_requested()) {
1764 monitor_protocol_event(QEVENT_SHUTDOWN
, NULL
);
1765 if (qemu_no_shutdown()) {
1770 } else if (qemu_powerdown_requested()) {
1771 monitor_protocol_event(QEVENT_POWERDOWN
, NULL
);
1772 qemu_irq_raise(qemu_system_powerdown
);
1773 } else if (qemu_reset_requested()) {
1774 qemu_kvm_system_reset();
1775 } else if (kvm_debug_cpu_requested
) {
1776 gdb_set_stop_cpu(kvm_debug_cpu_requested
);
1777 vm_stop(EXCP_DEBUG
);
1778 kvm_debug_cpu_requested
= NULL
;
1782 pause_all_threads();
1783 pthread_mutex_unlock(&qemu_mutex
);
1788 #if !defined(TARGET_I386)
1789 int kvm_arch_init_irq_routing(void)
1797 static int kvm_create_context(void)
1799 static const char upgrade_note
[] =
1800 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
1801 "(see http://sourceforge.net/projects/kvm).\n";
1806 kvm_disable_irqchip_creation(kvm_context
);
1809 kvm_disable_pit_creation(kvm_context
);
1811 if (kvm_create(kvm_context
, 0, NULL
) < 0) {
1812 kvm_finalize(kvm_state
);
1815 r
= kvm_arch_qemu_create_context();
1817 kvm_finalize(kvm_state
);
1820 if (kvm_pit
&& !kvm_pit_reinject
) {
1821 if (kvm_reinject_control(kvm_context
, 0)) {
1822 fprintf(stderr
, "failure to disable in-kernel PIT reinjection\n");
1827 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
1828 * destroyed properly. Since we rely on this capability, refuse to work
1829 * with any kernel without this capability. */
1830 if (!kvm_check_extension(kvm_state
, KVM_CAP_DESTROY_MEMORY_REGION_WORKS
)) {
1832 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
1837 r
= kvm_arch_init_irq_routing();
1842 kvm_state
->vcpu_events
= 0;
1843 #ifdef KVM_CAP_VCPU_EVENTS
1844 kvm_state
->vcpu_events
= kvm_check_extension(kvm_state
, KVM_CAP_VCPU_EVENTS
);
1847 kvm_state
->debugregs
= 0;
1848 #ifdef KVM_CAP_DEBUGREGS
1849 kvm_state
->debugregs
= kvm_check_extension(kvm_state
, KVM_CAP_DEBUGREGS
);
1854 if (!qemu_kvm_has_gsi_routing()) {
1857 /* if kernel can't do irq routing, interrupt source
1858 * override 0->2 can not be set up as required by hpet,
1862 } else if (!qemu_kvm_has_pit_state2()) {
1873 #ifdef KVM_CAP_IRQCHIP
1875 int kvm_set_irq(int irq
, int level
, int *status
)
1877 return kvm_set_irq_level(kvm_context
, irq
, level
, status
);
1882 static void kvm_mutex_unlock(void)
1884 assert(!cpu_single_env
);
1885 pthread_mutex_unlock(&qemu_mutex
);
1888 static void kvm_mutex_lock(void)
1890 pthread_mutex_lock(&qemu_mutex
);
1891 cpu_single_env
= NULL
;
1894 void qemu_mutex_unlock_iothread(void)
1896 if (kvm_enabled()) {
1901 void qemu_mutex_lock_iothread(void)
1903 if (kvm_enabled()) {
1908 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1909 void kvm_add_ioperm_data(struct ioperm_data
*data
)
1911 QLIST_INSERT_HEAD(&ioperm_head
, data
, entries
);
1914 void kvm_remove_ioperm_data(unsigned long start_port
, unsigned long num
)
1916 struct ioperm_data
*data
;
1918 data
= QLIST_FIRST(&ioperm_head
);
1920 struct ioperm_data
*next
= QLIST_NEXT(data
, entries
);
1922 if (data
->start_port
== start_port
&& data
->num
== num
) {
1923 QLIST_REMOVE(data
, entries
);
1931 void kvm_ioperm(CPUState
*env
, void *data
)
1933 if (kvm_enabled() && qemu_system_ready
) {
1934 on_vcpu(env
, kvm_arch_do_ioperm
, data
);
1940 int kvm_set_boot_cpu_id(uint32_t id
)
1942 return kvm_set_boot_vcpu_id(kvm_context
, id
);