pci: Remove pci_enable_capability_support()
[qemu-kvm/stefanha.git] / qemu-kvm.c
blob471306b020881ac7593feed69ba23fbcb30b051b
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
20 #include "monitor.h"
22 #include "qemu-kvm.h"
23 #include "libkvm.h"
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
30 #include "compatfd.h"
31 #include <sys/prctl.h>
33 #define false 0
34 #define true 1
36 #ifndef PR_MCE_KILL
37 #define PR_MCE_KILL 33
38 #endif
40 #ifndef BUS_MCEERR_AR
41 #define BUS_MCEERR_AR 4
42 #endif
43 #ifndef BUS_MCEERR_AO
44 #define BUS_MCEERR_AO 5
45 #endif
47 #define EXPECTED_KVM_API_VERSION 12
49 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
50 #error libkvm: userspace and kernel version mismatch
51 #endif
53 int kvm_irqchip = 1;
54 int kvm_pit = 1;
55 int kvm_pit_reinject = 1;
56 int kvm_nested = 0;
59 KVMState *kvm_state;
60 kvm_context_t kvm_context;
62 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
63 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
64 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
65 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
66 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
67 __thread CPUState *current_env;
69 static int qemu_system_ready;
71 #define SIG_IPI (SIGRTMIN+4)
73 pthread_t io_thread;
74 static int io_thread_sigfd = -1;
76 static CPUState *kvm_debug_cpu_requested;
78 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
79 /* The list of ioperm_data */
80 static QLIST_HEAD(, ioperm_data) ioperm_head;
81 #endif
83 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
85 int kvm_abi = EXPECTED_KVM_API_VERSION;
86 int kvm_page_size;
88 #ifdef KVM_CAP_SET_GUEST_DEBUG
89 static int kvm_debug(CPUState *env,
90 struct kvm_debug_exit_arch *arch_info)
92 int handle = kvm_arch_debug(arch_info);
94 if (handle) {
95 kvm_debug_cpu_requested = env;
96 env->stopped = 1;
98 return handle;
100 #endif
102 static int handle_unhandled(uint64_t reason)
104 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
105 return -EINVAL;
108 #define VMX_INVALID_GUEST_STATE 0x80000021
110 static int handle_failed_vmentry(uint64_t reason)
112 fprintf(stderr, "kvm: vm entry failed with error 0x%" PRIx64 "\n\n", reason);
114 /* Perhaps we will need to check if this machine is intel since exit reason 0x21
115 has a different interpretation on SVM */
116 if (reason == VMX_INVALID_GUEST_STATE) {
117 fprintf(stderr, "If you're runnning a guest on an Intel machine without\n");
118 fprintf(stderr, "unrestricted mode support, the failure can be most likely\n");
119 fprintf(stderr, "due to the guest entering an invalid state for Intel VT.\n");
120 fprintf(stderr, "For example, the guest maybe running in big real mode\n");
121 fprintf(stderr, "which is not supported on less recent Intel processors.\n\n");
124 return -EINVAL;
127 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
129 uint32_t *bitmap = kvm->used_gsi_bitmap;
131 if (gsi < kvm->max_gsi)
132 bitmap[gsi / 32] |= 1U << (gsi % 32);
133 else
134 DPRINTF("Invalid GSI %u\n", gsi);
137 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
139 uint32_t *bitmap = kvm->used_gsi_bitmap;
141 if (gsi < kvm->max_gsi)
142 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
143 else
144 DPRINTF("Invalid GSI %u\n", gsi);
147 static int kvm_create_context(void);
149 int kvm_init(int smp_cpus)
151 int fd;
152 int r, gsi_count;
155 fd = open("/dev/kvm", O_RDWR);
156 if (fd == -1) {
157 perror("open /dev/kvm");
158 return -1;
160 r = ioctl(fd, KVM_GET_API_VERSION, 0);
161 if (r == -1) {
162 fprintf(stderr,
163 "kvm kernel version too old: "
164 "KVM_GET_API_VERSION ioctl not supported\n");
165 goto out_close;
167 if (r < EXPECTED_KVM_API_VERSION) {
168 fprintf(stderr, "kvm kernel version too old: "
169 "We expect API version %d or newer, but got "
170 "version %d\n", EXPECTED_KVM_API_VERSION, r);
171 goto out_close;
173 if (r > EXPECTED_KVM_API_VERSION) {
174 fprintf(stderr, "kvm userspace version too old\n");
175 goto out_close;
177 kvm_abi = r;
178 kvm_page_size = getpagesize();
179 kvm_state = qemu_mallocz(sizeof(*kvm_state));
180 kvm_context = &kvm_state->kvm_context;
182 kvm_state->fd = fd;
183 kvm_state->vmfd = -1;
184 kvm_context->opaque = cpu_single_env;
185 kvm_context->dirty_pages_log_all = 0;
186 kvm_context->no_irqchip_creation = 0;
187 kvm_context->no_pit_creation = 0;
189 #ifdef KVM_CAP_SET_GUEST_DEBUG
190 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
191 #endif
193 gsi_count = kvm_get_gsi_count(kvm_context);
194 if (gsi_count > 0) {
195 int gsi_bits, i;
197 /* Round up so we can search ints using ffs */
198 gsi_bits = ALIGN(gsi_count, 32);
199 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
200 kvm_context->max_gsi = gsi_bits;
202 /* Mark any over-allocated bits as already in use */
203 for (i = gsi_count; i < gsi_bits; i++) {
204 set_gsi(kvm_context, i);
208 kvm_cpu_register_phys_memory_client();
210 pthread_mutex_lock(&qemu_mutex);
211 return kvm_create_context();
213 out_close:
214 close(fd);
215 return -1;
218 static void kvm_finalize(KVMState *s)
220 /* FIXME
221 if (kvm->vcpu_fd[0] != -1)
222 close(kvm->vcpu_fd[0]);
223 if (kvm->vm_fd != -1)
224 close(kvm->vm_fd);
226 close(s->fd);
227 free(s);
230 void kvm_disable_irqchip_creation(kvm_context_t kvm)
232 kvm->no_irqchip_creation = 1;
235 void kvm_disable_pit_creation(kvm_context_t kvm)
237 kvm->no_pit_creation = 1;
240 static void kvm_reset_vcpu(void *opaque)
242 CPUState *env = opaque;
244 kvm_arch_cpu_reset(env);
247 static void kvm_create_vcpu(CPUState *env, int id)
249 long mmap_size;
250 int r;
251 KVMState *s = kvm_state;
253 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
254 if (r < 0) {
255 fprintf(stderr, "kvm_create_vcpu: %m\n");
256 fprintf(stderr, "Failed to create vCPU. Check the -smp parameter.\n");
257 goto err;
260 env->kvm_fd = r;
261 env->kvm_state = kvm_state;
263 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
264 if (mmap_size < 0) {
265 fprintf(stderr, "get vcpu mmap size: %m\n");
266 goto err_fd;
268 env->kvm_run =
269 mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,
271 if (env->kvm_run == MAP_FAILED) {
272 fprintf(stderr, "mmap vcpu area: %m\n");
273 goto err_fd;
276 #ifdef KVM_CAP_COALESCED_MMIO
277 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
278 s->coalesced_mmio_ring = (void *) env->kvm_run +
279 s->coalesced_mmio * PAGE_SIZE;
280 #endif
282 r = kvm_arch_init_vcpu(env);
283 if (r == 0) {
284 qemu_register_reset(kvm_reset_vcpu, env);
287 return;
288 err_fd:
289 close(env->kvm_fd);
290 err:
291 /* We're no good with semi-broken states. */
292 abort();
295 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
297 #ifdef KVM_CAP_SET_BOOT_CPU_ID
298 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
299 if (r > 0) {
300 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
302 return -ENOSYS;
303 #else
304 return -ENOSYS;
305 #endif
308 int kvm_create_vm(kvm_context_t kvm)
310 int fd;
311 #ifdef KVM_CAP_IRQ_ROUTING
312 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
313 kvm->nr_allocated_irq_routes = 0;
314 #endif
316 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
317 if (fd < 0) {
318 fprintf(stderr, "kvm_create_vm: %m\n");
319 return -1;
321 kvm_state->vmfd = fd;
322 return 0;
325 static int kvm_create_default_phys_mem(kvm_context_t kvm,
326 unsigned long phys_mem_bytes,
327 void **vm_mem)
329 #ifdef KVM_CAP_USER_MEMORY
330 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
331 if (r > 0)
332 return 0;
333 fprintf(stderr,
334 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
335 #else
336 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
337 #endif
338 return -1;
341 void kvm_create_irqchip(kvm_context_t kvm)
343 int r;
345 kvm->irqchip_in_kernel = 0;
346 #ifdef KVM_CAP_IRQCHIP
347 if (!kvm->no_irqchip_creation) {
348 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
349 if (r > 0) { /* kernel irqchip supported */
350 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
351 if (r >= 0) {
352 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
353 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
354 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
355 KVM_CAP_IRQ_INJECT_STATUS);
356 if (r > 0) {
357 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
359 #endif
360 kvm->irqchip_in_kernel = 1;
361 } else
362 fprintf(stderr, "Create kernel PIC irqchip failed\n");
365 #endif
366 kvm_state->irqchip_in_kernel = kvm->irqchip_in_kernel;
369 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
371 int r, i;
373 r = kvm_create_vm(kvm);
374 if (r < 0) {
375 return r;
377 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
378 if (r < 0) {
379 return r;
381 for (i = 0; i < ARRAY_SIZE(kvm_state->slots); i++) {
382 kvm_state->slots[i].slot = i;
385 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
386 if (r < 0) {
387 return r;
390 kvm_create_irqchip(kvm);
392 return 0;
395 #ifdef KVM_CAP_IRQCHIP
397 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
399 struct kvm_irq_level event;
400 int r;
402 if (!kvm->irqchip_in_kernel) {
403 return 0;
405 event.level = level;
406 event.irq = irq;
407 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
408 if (r < 0) {
409 perror("kvm_set_irq_level");
412 if (status) {
413 #ifdef KVM_CAP_IRQ_INJECT_STATUS
414 *status =
415 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
416 #else
417 *status = 1;
418 #endif
421 return 1;
424 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
426 int r;
428 if (!kvm->irqchip_in_kernel) {
429 return 0;
431 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
432 if (r < 0) {
433 perror("kvm_get_irqchip\n");
435 return r;
438 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
440 int r;
442 if (!kvm->irqchip_in_kernel) {
443 return 0;
445 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
446 if (r < 0) {
447 perror("kvm_set_irqchip\n");
449 return r;
452 #endif
454 static int handle_debug(CPUState *env)
456 #ifdef KVM_CAP_SET_GUEST_DEBUG
457 struct kvm_run *run = env->kvm_run;
459 return kvm_debug(env, &run->debug.arch);
460 #else
461 return 0;
462 #endif
465 int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
467 return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
470 int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
472 return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
475 #ifdef KVM_CAP_MP_STATE
476 int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
478 int r;
480 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
481 if (r > 0) {
482 return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
484 return -ENOSYS;
487 int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
489 int r;
491 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
492 if (r > 0) {
493 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
495 return -ENOSYS;
497 #endif
499 static int handle_mmio(CPUState *env)
501 unsigned long addr = env->kvm_run->mmio.phys_addr;
502 struct kvm_run *kvm_run = env->kvm_run;
503 void *data = kvm_run->mmio.data;
505 /* hack: Red Hat 7.1 generates these weird accesses. */
506 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) {
507 return 0;
510 cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
511 return 0;
514 int handle_io_window(kvm_context_t kvm)
516 return 1;
519 int handle_shutdown(kvm_context_t kvm, CPUState *env)
521 /* stop the current vcpu from going back to guest mode */
522 env->stopped = 1;
524 qemu_system_reset_request();
525 return 1;
528 static inline void push_nmi(kvm_context_t kvm)
530 #ifdef KVM_CAP_USER_NMI
531 kvm_arch_push_nmi(kvm->opaque);
532 #endif /* KVM_CAP_USER_NMI */
535 void post_kvm_run(kvm_context_t kvm, CPUState *env)
537 pthread_mutex_lock(&qemu_mutex);
538 kvm_arch_post_run(env, env->kvm_run);
539 cpu_single_env = env;
542 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
544 kvm_arch_pre_run(env, env->kvm_run);
546 pthread_mutex_unlock(&qemu_mutex);
547 return 0;
550 int kvm_is_ready_for_interrupt_injection(CPUState *env)
552 return env->kvm_run->ready_for_interrupt_injection;
555 int kvm_run(CPUState *env)
557 int r;
558 kvm_context_t kvm = &env->kvm_state->kvm_context;
559 struct kvm_run *run = env->kvm_run;
560 int fd = env->kvm_fd;
562 again:
563 if (env->kvm_vcpu_dirty) {
564 kvm_arch_load_regs(env, KVM_PUT_RUNTIME_STATE);
565 env->kvm_vcpu_dirty = 0;
567 push_nmi(kvm);
568 #if !defined(__s390__)
569 if (!kvm->irqchip_in_kernel) {
570 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
572 #endif
574 r = pre_kvm_run(kvm, env);
575 if (r) {
576 return r;
578 if (env->exit_request) {
579 env->exit_request = 0;
580 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
582 r = ioctl(fd, KVM_RUN, 0);
584 if (r == -1 && errno != EINTR && errno != EAGAIN) {
585 r = -errno;
586 post_kvm_run(kvm, env);
587 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
588 return r;
591 post_kvm_run(kvm, env);
593 kvm_flush_coalesced_mmio_buffer();
595 #if !defined(__s390__)
596 if (r == -1) {
597 r = handle_io_window(kvm);
598 goto more;
600 #endif
601 if (1) {
602 switch (run->exit_reason) {
603 case KVM_EXIT_UNKNOWN:
604 r = handle_unhandled(run->hw.hardware_exit_reason);
605 break;
606 case KVM_EXIT_FAIL_ENTRY:
607 r = handle_failed_vmentry(run->fail_entry.hardware_entry_failure_reason);
608 break;
609 case KVM_EXIT_EXCEPTION:
610 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
611 run->ex.error_code);
612 kvm_show_regs(env);
613 kvm_show_code(env);
614 abort();
615 break;
616 case KVM_EXIT_IO:
617 r = kvm_handle_io(run->io.port,
618 (uint8_t *)run + run->io.data_offset,
619 run->io.direction,
620 run->io.size,
621 run->io.count);
622 r = 0;
623 break;
624 case KVM_EXIT_DEBUG:
625 r = handle_debug(env);
626 break;
627 case KVM_EXIT_MMIO:
628 r = handle_mmio(env);
629 break;
630 case KVM_EXIT_HLT:
631 r = kvm_arch_halt(env);
632 break;
633 case KVM_EXIT_IRQ_WINDOW_OPEN:
634 break;
635 case KVM_EXIT_SHUTDOWN:
636 r = handle_shutdown(kvm, env);
637 break;
638 #if defined(__s390__)
639 case KVM_EXIT_S390_SIEIC:
640 r = kvm_s390_handle_intercept(kvm, env, run);
641 break;
642 case KVM_EXIT_S390_RESET:
643 r = kvm_s390_handle_reset(kvm, env, run);
644 break;
645 #endif
646 case KVM_EXIT_INTERNAL_ERROR:
647 kvm_handle_internal_error(env, run);
648 r = 1;
649 break;
650 default:
651 if (kvm_arch_run(env)) {
652 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
653 kvm_show_regs(env);
654 abort();
656 break;
659 more:
660 if (!r) {
661 goto again;
663 return r;
666 int kvm_inject_irq(CPUState *env, unsigned irq)
668 struct kvm_interrupt intr;
670 intr.irq = irq;
671 return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
674 int kvm_inject_nmi(CPUState *env)
676 #ifdef KVM_CAP_USER_NMI
677 return kvm_vcpu_ioctl(env, KVM_NMI);
678 #else
679 return -ENOSYS;
680 #endif
683 int kvm_init_coalesced_mmio(kvm_context_t kvm)
685 int r = 0;
686 kvm_state->coalesced_mmio = 0;
687 #ifdef KVM_CAP_COALESCED_MMIO
688 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
689 if (r > 0) {
690 kvm_state->coalesced_mmio = r;
691 return 0;
693 #endif
694 return r;
697 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
698 int kvm_assign_pci_device(kvm_context_t kvm,
699 struct kvm_assigned_pci_dev *assigned_dev)
701 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
704 static int kvm_old_assign_irq(kvm_context_t kvm,
705 struct kvm_assigned_irq *assigned_irq)
707 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
710 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
711 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
713 int ret;
715 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
716 if (ret > 0) {
717 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
720 return kvm_old_assign_irq(kvm, assigned_irq);
723 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
725 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
727 #else
728 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
730 return kvm_old_assign_irq(kvm, assigned_irq);
732 #endif
733 #endif
735 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
736 int kvm_deassign_pci_device(kvm_context_t kvm,
737 struct kvm_assigned_pci_dev *assigned_dev)
739 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
741 #endif
743 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
745 #ifdef KVM_CAP_REINJECT_CONTROL
746 int r;
747 struct kvm_reinject_control control;
749 control.pit_reinject = pit_reinject;
751 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
752 if (r > 0) {
753 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
755 #endif
756 return -ENOSYS;
759 int kvm_has_gsi_routing(void)
761 int r = 0;
763 #ifdef KVM_CAP_IRQ_ROUTING
764 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
765 #endif
766 return r;
769 int kvm_get_gsi_count(kvm_context_t kvm)
771 #ifdef KVM_CAP_IRQ_ROUTING
772 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
773 #else
774 return -EINVAL;
775 #endif
778 int kvm_clear_gsi_routes(void)
780 #ifdef KVM_CAP_IRQ_ROUTING
781 kvm_context_t kvm = kvm_context;
783 kvm->irq_routes->nr = 0;
784 return 0;
785 #else
786 return -EINVAL;
787 #endif
790 int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry)
792 #ifdef KVM_CAP_IRQ_ROUTING
793 kvm_context_t kvm = kvm_context;
794 struct kvm_irq_routing *z;
795 struct kvm_irq_routing_entry *new;
796 int n, size;
798 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
799 n = kvm->nr_allocated_irq_routes * 2;
800 if (n < 64) {
801 n = 64;
803 size = sizeof(struct kvm_irq_routing);
804 size += n * sizeof(*new);
805 z = realloc(kvm->irq_routes, size);
806 if (!z) {
807 return -ENOMEM;
809 kvm->nr_allocated_irq_routes = n;
810 kvm->irq_routes = z;
812 n = kvm->irq_routes->nr++;
813 new = &kvm->irq_routes->entries[n];
814 memset(new, 0, sizeof(*new));
815 new->gsi = entry->gsi;
816 new->type = entry->type;
817 new->flags = entry->flags;
818 new->u = entry->u;
820 set_gsi(kvm, entry->gsi);
822 return 0;
823 #else
824 return -ENOSYS;
825 #endif
828 int kvm_add_irq_route(int gsi, int irqchip, int pin)
830 #ifdef KVM_CAP_IRQ_ROUTING
831 struct kvm_irq_routing_entry e;
833 e.gsi = gsi;
834 e.type = KVM_IRQ_ROUTING_IRQCHIP;
835 e.flags = 0;
836 e.u.irqchip.irqchip = irqchip;
837 e.u.irqchip.pin = pin;
838 return kvm_add_routing_entry(&e);
839 #else
840 return -ENOSYS;
841 #endif
844 int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry)
846 #ifdef KVM_CAP_IRQ_ROUTING
847 kvm_context_t kvm = kvm_context;
848 struct kvm_irq_routing_entry *e, *p;
849 int i, gsi, found = 0;
851 gsi = entry->gsi;
853 for (i = 0; i < kvm->irq_routes->nr; ++i) {
854 e = &kvm->irq_routes->entries[i];
855 if (e->type == entry->type && e->gsi == gsi) {
856 switch (e->type) {
857 case KVM_IRQ_ROUTING_IRQCHIP:{
858 if (e->u.irqchip.irqchip ==
859 entry->u.irqchip.irqchip
860 && e->u.irqchip.pin == entry->u.irqchip.pin) {
861 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
862 *e = *p;
863 found = 1;
865 break;
867 case KVM_IRQ_ROUTING_MSI:{
868 if (e->u.msi.address_lo ==
869 entry->u.msi.address_lo
870 && e->u.msi.address_hi ==
871 entry->u.msi.address_hi
872 && e->u.msi.data == entry->u.msi.data) {
873 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
874 *e = *p;
875 found = 1;
877 break;
879 default:
880 break;
882 if (found) {
883 /* If there are no other users of this GSI
884 * mark it available in the bitmap */
885 for (i = 0; i < kvm->irq_routes->nr; i++) {
886 e = &kvm->irq_routes->entries[i];
887 if (e->gsi == gsi)
888 break;
890 if (i == kvm->irq_routes->nr) {
891 clear_gsi(kvm, gsi);
894 return 0;
898 return -ESRCH;
899 #else
900 return -ENOSYS;
901 #endif
904 int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry,
905 struct kvm_irq_routing_entry *newentry)
907 #ifdef KVM_CAP_IRQ_ROUTING
908 kvm_context_t kvm = kvm_context;
909 struct kvm_irq_routing_entry *e;
910 int i;
912 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
913 return -EINVAL;
916 for (i = 0; i < kvm->irq_routes->nr; ++i) {
917 e = &kvm->irq_routes->entries[i];
918 if (e->type != entry->type || e->gsi != entry->gsi) {
919 continue;
921 switch (e->type) {
922 case KVM_IRQ_ROUTING_IRQCHIP:
923 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
924 e->u.irqchip.pin == entry->u.irqchip.pin) {
925 memcpy(&e->u.irqchip, &newentry->u.irqchip,
926 sizeof e->u.irqchip);
927 return 0;
929 break;
930 case KVM_IRQ_ROUTING_MSI:
931 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
932 e->u.msi.address_hi == entry->u.msi.address_hi &&
933 e->u.msi.data == entry->u.msi.data) {
934 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
935 return 0;
937 break;
938 default:
939 break;
942 return -ESRCH;
943 #else
944 return -ENOSYS;
945 #endif
948 int kvm_del_irq_route(int gsi, int irqchip, int pin)
950 #ifdef KVM_CAP_IRQ_ROUTING
951 struct kvm_irq_routing_entry e;
953 e.gsi = gsi;
954 e.type = KVM_IRQ_ROUTING_IRQCHIP;
955 e.flags = 0;
956 e.u.irqchip.irqchip = irqchip;
957 e.u.irqchip.pin = pin;
958 return kvm_del_routing_entry(&e);
959 #else
960 return -ENOSYS;
961 #endif
964 int kvm_commit_irq_routes(void)
966 #ifdef KVM_CAP_IRQ_ROUTING
967 kvm_context_t kvm = kvm_context;
969 kvm->irq_routes->flags = 0;
970 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
971 #else
972 return -ENOSYS;
973 #endif
976 int kvm_get_irq_route_gsi(void)
978 kvm_context_t kvm = kvm_context;
979 int i, bit;
980 uint32_t *buf = kvm->used_gsi_bitmap;
982 /* Return the lowest unused GSI in the bitmap */
983 for (i = 0; i < kvm->max_gsi / 32; i++) {
984 bit = ffs(~buf[i]);
985 if (!bit) {
986 continue;
989 return bit - 1 + i * 32;
992 return -ENOSPC;
995 static void kvm_msix_routing_entry(struct kvm_irq_routing_entry *e,
996 uint32_t gsi, uint32_t addr_lo,
997 uint32_t addr_hi, uint32_t data)
1000 e->gsi = gsi;
1001 e->type = KVM_IRQ_ROUTING_MSI;
1002 e->flags = 0;
1003 e->u.msi.address_lo = addr_lo;
1004 e->u.msi.address_hi = addr_hi;
1005 e->u.msi.data = data;
1008 int kvm_add_msix(uint32_t gsi, uint32_t addr_lo,
1009 uint32_t addr_hi, uint32_t data)
1011 struct kvm_irq_routing_entry e;
1013 kvm_msix_routing_entry(&e, gsi, addr_lo, addr_hi, data);
1014 return kvm_add_routing_entry(&e);
1017 int kvm_del_msix(uint32_t gsi, uint32_t addr_lo,
1018 uint32_t addr_hi, uint32_t data)
1020 struct kvm_irq_routing_entry e;
1022 kvm_msix_routing_entry(&e, gsi, addr_lo, addr_hi, data);
1023 return kvm_del_routing_entry(&e);
1026 int kvm_update_msix(uint32_t old_gsi, uint32_t old_addr_lo,
1027 uint32_t old_addr_hi, uint32_t old_data,
1028 uint32_t new_gsi, uint32_t new_addr_lo,
1029 uint32_t new_addr_hi, uint32_t new_data)
1031 struct kvm_irq_routing_entry e1, e2;
1033 kvm_msix_routing_entry(&e1, old_gsi, old_addr_lo, old_addr_hi, old_data);
1034 kvm_msix_routing_entry(&e2, new_gsi, new_addr_lo, new_addr_hi, new_data);
1035 return kvm_update_routing_entry(&e1, &e2);
1039 #ifdef KVM_CAP_DEVICE_MSIX
1040 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1041 struct kvm_assigned_msix_nr *msix_nr)
1043 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1046 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1047 struct kvm_assigned_msix_entry *entry)
1049 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1051 #endif
1053 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1055 #include <sys/eventfd.h>
1057 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1059 struct kvm_irqfd data = {
1060 .fd = fd,
1061 .gsi = gsi,
1062 .flags = flags,
1065 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1068 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1070 int r;
1071 int fd;
1073 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1074 return -ENOENT;
1076 fd = eventfd(0, 0);
1077 if (fd < 0) {
1078 return -errno;
1081 r = _kvm_irqfd(kvm, fd, gsi, 0);
1082 if (r < 0) {
1083 close(fd);
1084 return -errno;
1087 return fd;
1090 #else /* KVM_CAP_IRQFD */
1092 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1094 return -ENOSYS;
1097 #endif /* KVM_CAP_IRQFD */
1098 unsigned long kvm_get_thread_id(void)
1100 return syscall(SYS_gettid);
1103 static void qemu_cond_wait(pthread_cond_t *cond)
1105 CPUState *env = cpu_single_env;
1107 pthread_cond_wait(cond, &qemu_mutex);
1108 cpu_single_env = env;
1111 static void sig_ipi_handler(int n)
1115 static void sigbus_reraise(void)
1117 sigset_t set;
1118 struct sigaction action;
1120 memset(&action, 0, sizeof(action));
1121 action.sa_handler = SIG_DFL;
1122 if (!sigaction(SIGBUS, &action, NULL)) {
1123 raise(SIGBUS);
1124 sigemptyset(&set);
1125 sigaddset(&set, SIGBUS);
1126 sigprocmask(SIG_UNBLOCK, &set, NULL);
1128 perror("Failed to re-raise SIGBUS!\n");
1129 abort();
1132 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
1133 void *ctx)
1135 if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
1136 sigbus_reraise();
1139 void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1141 struct qemu_work_item wi;
1143 if (env == current_env) {
1144 func(data);
1145 return;
1148 wi.func = func;
1149 wi.data = data;
1150 if (!env->kvm_cpu_state.queued_work_first) {
1151 env->kvm_cpu_state.queued_work_first = &wi;
1152 } else {
1153 env->kvm_cpu_state.queued_work_last->next = &wi;
1155 env->kvm_cpu_state.queued_work_last = &wi;
1156 wi.next = NULL;
1157 wi.done = false;
1159 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1160 while (!wi.done) {
1161 qemu_cond_wait(&qemu_work_cond);
1165 static void do_kvm_cpu_synchronize_state(void *_env)
1167 CPUState *env = _env;
1169 if (!env->kvm_vcpu_dirty) {
1170 kvm_arch_save_regs(env);
1171 env->kvm_vcpu_dirty = 1;
1175 void kvm_cpu_synchronize_state(CPUState *env)
1177 if (!env->kvm_vcpu_dirty) {
1178 on_vcpu(env, do_kvm_cpu_synchronize_state, env);
1182 void kvm_cpu_synchronize_post_reset(CPUState *env)
1184 kvm_arch_load_regs(env, KVM_PUT_RESET_STATE);
1185 env->kvm_vcpu_dirty = 0;
1188 void kvm_cpu_synchronize_post_init(CPUState *env)
1190 kvm_arch_load_regs(env, KVM_PUT_FULL_STATE);
1191 env->kvm_vcpu_dirty = 0;
1194 static void inject_interrupt(void *data)
1196 cpu_interrupt(current_env, (long) data);
1199 void kvm_inject_interrupt(CPUState *env, int mask)
1201 on_vcpu(env, inject_interrupt, (void *) (long) mask);
1204 void kvm_update_interrupt_request(CPUState *env)
1206 int signal = 0;
1208 if (env) {
1209 if (!current_env || !current_env->created) {
1210 signal = 1;
1213 * Testing for created here is really redundant
1215 if (current_env && current_env->created &&
1216 env != current_env && !env->kvm_cpu_state.signalled) {
1217 signal = 1;
1220 if (signal) {
1221 env->kvm_cpu_state.signalled = 1;
1222 if (env->kvm_cpu_state.thread) {
1223 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1229 int kvm_cpu_exec(CPUState *env)
1231 int r;
1233 r = kvm_run(env);
1234 if (r < 0) {
1235 printf("kvm_run returned %d\n", r);
1236 vm_stop(0);
1239 return 0;
1242 int kvm_cpu_is_stopped(CPUState *env)
1244 return !vm_running || env->stopped;
1247 static void flush_queued_work(CPUState *env)
1249 struct qemu_work_item *wi;
1251 if (!env->kvm_cpu_state.queued_work_first) {
1252 return;
1255 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1256 env->kvm_cpu_state.queued_work_first = wi->next;
1257 wi->func(wi->data);
1258 wi->done = true;
1260 env->kvm_cpu_state.queued_work_last = NULL;
1261 pthread_cond_broadcast(&qemu_work_cond);
1264 static void kvm_main_loop_wait(CPUState *env, int timeout)
1266 struct timespec ts;
1267 int r, e;
1268 siginfo_t siginfo;
1269 sigset_t waitset;
1270 sigset_t chkset;
1272 ts.tv_sec = timeout / 1000;
1273 ts.tv_nsec = (timeout % 1000) * 1000000;
1274 sigemptyset(&waitset);
1275 sigaddset(&waitset, SIG_IPI);
1276 sigaddset(&waitset, SIGBUS);
1278 do {
1279 pthread_mutex_unlock(&qemu_mutex);
1281 r = sigtimedwait(&waitset, &siginfo, &ts);
1282 e = errno;
1284 pthread_mutex_lock(&qemu_mutex);
1286 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1287 printf("sigtimedwait: %s\n", strerror(e));
1288 exit(1);
1291 switch (r) {
1292 case SIGBUS:
1293 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
1294 sigbus_reraise();
1295 break;
1296 default:
1297 break;
1300 r = sigpending(&chkset);
1301 if (r == -1) {
1302 printf("sigpending: %s\n", strerror(e));
1303 exit(1);
1305 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1307 cpu_single_env = env;
1308 flush_queued_work(env);
1310 if (env->stop) {
1311 env->stop = 0;
1312 env->stopped = 1;
1313 pthread_cond_signal(&qemu_pause_cond);
1316 env->kvm_cpu_state.signalled = 0;
1319 static int all_threads_paused(void)
1321 CPUState *penv = first_cpu;
1323 while (penv) {
1324 if (penv->stop) {
1325 return 0;
1327 penv = (CPUState *) penv->next_cpu;
1330 return 1;
1333 static void pause_all_threads(void)
1335 CPUState *penv = first_cpu;
1337 while (penv) {
1338 if (penv != cpu_single_env) {
1339 penv->stop = 1;
1340 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1341 } else {
1342 penv->stop = 0;
1343 penv->stopped = 1;
1344 cpu_exit(penv);
1346 penv = (CPUState *) penv->next_cpu;
1349 while (!all_threads_paused()) {
1350 qemu_cond_wait(&qemu_pause_cond);
1354 static void resume_all_threads(void)
1356 CPUState *penv = first_cpu;
1358 assert(!cpu_single_env);
1360 while (penv) {
1361 penv->stop = 0;
1362 penv->stopped = 0;
1363 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1364 penv = (CPUState *) penv->next_cpu;
1368 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1370 if (running) {
1371 resume_all_threads();
1372 } else {
1373 pause_all_threads();
1377 static void setup_kernel_sigmask(CPUState *env)
1379 sigset_t set;
1381 sigemptyset(&set);
1382 sigaddset(&set, SIGUSR2);
1383 sigaddset(&set, SIGIO);
1384 sigaddset(&set, SIGALRM);
1385 sigprocmask(SIG_BLOCK, &set, NULL);
1387 sigprocmask(SIG_BLOCK, NULL, &set);
1388 sigdelset(&set, SIG_IPI);
1389 sigdelset(&set, SIGBUS);
1391 kvm_set_signal_mask(env, &set);
1394 static void qemu_kvm_system_reset(void)
1396 pause_all_threads();
1398 qemu_system_reset();
1400 resume_all_threads();
1403 static void process_irqchip_events(CPUState *env)
1405 kvm_arch_process_irqchip_events(env);
1406 if (kvm_arch_has_work(env))
1407 env->halted = 0;
1410 static int kvm_main_loop_cpu(CPUState *env)
1412 while (1) {
1413 int run_cpu = !kvm_cpu_is_stopped(env);
1414 if (run_cpu && !kvm_irqchip_in_kernel()) {
1415 process_irqchip_events(env);
1416 run_cpu = !env->halted;
1418 if (run_cpu) {
1419 kvm_cpu_exec(env);
1420 kvm_main_loop_wait(env, 0);
1421 } else {
1422 kvm_main_loop_wait(env, 1000);
1425 pthread_mutex_unlock(&qemu_mutex);
1426 return 0;
1429 static void *ap_main_loop(void *_env)
1431 CPUState *env = _env;
1432 sigset_t signals;
1433 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1434 struct ioperm_data *data = NULL;
1435 #endif
1437 current_env = env;
1438 env->thread_id = kvm_get_thread_id();
1439 sigfillset(&signals);
1440 sigprocmask(SIG_BLOCK, &signals, NULL);
1442 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1443 /* do ioperm for io ports of assigned devices */
1444 QLIST_FOREACH(data, &ioperm_head, entries)
1445 on_vcpu(env, kvm_arch_do_ioperm, data);
1446 #endif
1448 pthread_mutex_lock(&qemu_mutex);
1449 cpu_single_env = env;
1451 kvm_create_vcpu(env, env->cpu_index);
1452 setup_kernel_sigmask(env);
1454 /* signal VCPU creation */
1455 current_env->created = 1;
1456 pthread_cond_signal(&qemu_vcpu_cond);
1458 /* and wait for machine initialization */
1459 while (!qemu_system_ready) {
1460 qemu_cond_wait(&qemu_system_cond);
1463 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1464 cpu_single_env = env;
1466 kvm_main_loop_cpu(env);
1467 return NULL;
1470 int kvm_init_vcpu(CPUState *env)
1472 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1474 while (env->created == 0) {
1475 qemu_cond_wait(&qemu_vcpu_cond);
1478 return 0;
1481 int kvm_vcpu_inited(CPUState *env)
1483 return env->created;
1486 #ifdef TARGET_I386
1487 void kvm_hpet_disable_kpit(void)
1489 struct kvm_pit_state2 ps2;
1491 kvm_get_pit2(kvm_context, &ps2);
1492 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1493 kvm_set_pit2(kvm_context, &ps2);
1496 void kvm_hpet_enable_kpit(void)
1498 struct kvm_pit_state2 ps2;
1500 kvm_get_pit2(kvm_context, &ps2);
1501 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1502 kvm_set_pit2(kvm_context, &ps2);
1504 #endif
1506 int kvm_init_ap(void)
1508 struct sigaction action;
1510 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1512 signal(SIG_IPI, sig_ipi_handler);
1514 memset(&action, 0, sizeof(action));
1515 action.sa_flags = SA_SIGINFO;
1516 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
1517 sigaction(SIGBUS, &action, NULL);
1518 prctl(PR_MCE_KILL, 1, 1, 0, 0);
1519 return 0;
1522 /* If we have signalfd, we mask out the signals we want to handle and then
1523 * use signalfd to listen for them. We rely on whatever the current signal
1524 * handler is to dispatch the signals when we receive them.
1527 static void sigfd_handler(void *opaque)
1529 int fd = (unsigned long) opaque;
1530 struct qemu_signalfd_siginfo info;
1531 struct sigaction action;
1532 ssize_t len;
1534 while (1) {
1535 do {
1536 len = read(fd, &info, sizeof(info));
1537 } while (len == -1 && errno == EINTR);
1539 if (len == -1 && errno == EAGAIN) {
1540 break;
1543 if (len != sizeof(info)) {
1544 printf("read from sigfd returned %zd: %m\n", len);
1545 return;
1548 sigaction(info.ssi_signo, NULL, &action);
1549 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
1550 action.sa_sigaction(info.ssi_signo,
1551 (siginfo_t *)&info, NULL);
1552 } else if (action.sa_handler) {
1553 action.sa_handler(info.ssi_signo);
1558 int kvm_main_loop(void)
1560 sigset_t mask;
1561 int sigfd;
1563 io_thread = pthread_self();
1564 qemu_system_ready = 1;
1566 sigemptyset(&mask);
1567 sigaddset(&mask, SIGIO);
1568 sigaddset(&mask, SIGALRM);
1569 sigaddset(&mask, SIGBUS);
1570 sigprocmask(SIG_BLOCK, &mask, NULL);
1572 sigfd = qemu_signalfd(&mask);
1573 if (sigfd == -1) {
1574 fprintf(stderr, "failed to create signalfd\n");
1575 return -errno;
1578 fcntl(sigfd, F_SETFL, O_NONBLOCK);
1580 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
1581 (void *)(unsigned long) sigfd);
1583 pthread_cond_broadcast(&qemu_system_cond);
1585 io_thread_sigfd = sigfd;
1586 cpu_single_env = NULL;
1588 while (1) {
1589 main_loop_wait(0);
1590 if (qemu_shutdown_requested()) {
1591 monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
1592 if (qemu_no_shutdown()) {
1593 vm_stop(0);
1594 } else {
1595 break;
1597 } else if (qemu_powerdown_requested()) {
1598 monitor_protocol_event(QEVENT_POWERDOWN, NULL);
1599 qemu_irq_raise(qemu_system_powerdown);
1600 } else if (qemu_reset_requested()) {
1601 qemu_kvm_system_reset();
1602 } else if (kvm_debug_cpu_requested) {
1603 gdb_set_stop_cpu(kvm_debug_cpu_requested);
1604 vm_stop(EXCP_DEBUG);
1605 kvm_debug_cpu_requested = NULL;
1609 pause_all_threads();
1610 pthread_mutex_unlock(&qemu_mutex);
1612 return 0;
1615 #if !defined(TARGET_I386)
1616 int kvm_arch_init_irq_routing(void)
1618 return 0;
1620 #endif
1622 extern int no_hpet;
1624 static int kvm_create_context(void)
1626 static const char upgrade_note[] =
1627 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
1628 "(see http://sourceforge.net/projects/kvm).\n";
1630 int r;
1632 if (!kvm_irqchip) {
1633 kvm_disable_irqchip_creation(kvm_context);
1635 if (!kvm_pit) {
1636 kvm_disable_pit_creation(kvm_context);
1638 if (kvm_create(kvm_context, 0, NULL) < 0) {
1639 kvm_finalize(kvm_state);
1640 return -1;
1642 r = kvm_arch_qemu_create_context();
1643 if (r < 0) {
1644 kvm_finalize(kvm_state);
1645 return -1;
1647 if (kvm_pit && !kvm_pit_reinject) {
1648 if (kvm_reinject_control(kvm_context, 0)) {
1649 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
1650 return -1;
1654 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
1655 * destroyed properly. Since we rely on this capability, refuse to work
1656 * with any kernel without this capability. */
1657 if (!kvm_check_extension(kvm_state, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
1658 fprintf(stderr,
1659 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
1660 upgrade_note);
1661 return -EINVAL;
1664 r = kvm_arch_init_irq_routing();
1665 if (r < 0) {
1666 return r;
1669 kvm_state->vcpu_events = 0;
1670 #ifdef KVM_CAP_VCPU_EVENTS
1671 kvm_state->vcpu_events = kvm_check_extension(kvm_state, KVM_CAP_VCPU_EVENTS);
1672 #endif
1674 kvm_state->debugregs = 0;
1675 #ifdef KVM_CAP_DEBUGREGS
1676 kvm_state->debugregs = kvm_check_extension(kvm_state, KVM_CAP_DEBUGREGS);
1677 #endif
1679 kvm_state->xsave = 0;
1680 #ifdef KVM_CAP_XSAVE
1681 kvm_state->xsave = kvm_check_extension(kvm_state, KVM_CAP_XSAVE);
1682 #endif
1684 kvm_state->xcrs = 0;
1685 #ifdef KVM_CAP_XCRS
1686 kvm_state->xcrs = kvm_check_extension(kvm_state, KVM_CAP_XCRS);
1687 #endif
1689 kvm_init_ap();
1690 if (kvm_irqchip) {
1691 if (!qemu_kvm_has_gsi_routing()) {
1692 irq0override = 0;
1693 #ifdef TARGET_I386
1694 /* if kernel can't do irq routing, interrupt source
1695 * override 0->2 can not be set up as required by hpet,
1696 * so disable hpet.
1698 no_hpet = 1;
1699 } else if (!qemu_kvm_has_pit_state2()) {
1700 no_hpet = 1;
1702 #else
1704 #endif
1707 return 0;
1710 #ifdef KVM_CAP_IRQCHIP
1712 int kvm_set_irq(int irq, int level, int *status)
1714 return kvm_set_irq_level(kvm_context, irq, level, status);
1717 #endif
1719 static void kvm_mutex_unlock(void)
1721 assert(!cpu_single_env);
1722 pthread_mutex_unlock(&qemu_mutex);
1725 static void kvm_mutex_lock(void)
1727 pthread_mutex_lock(&qemu_mutex);
1728 cpu_single_env = NULL;
1731 void qemu_mutex_unlock_iothread(void)
1733 if (kvm_enabled()) {
1734 kvm_mutex_unlock();
1738 void qemu_mutex_lock_iothread(void)
1740 if (kvm_enabled()) {
1741 kvm_mutex_lock();
1745 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1746 void kvm_add_ioperm_data(struct ioperm_data *data)
1748 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
1751 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
1753 struct ioperm_data *data;
1755 data = QLIST_FIRST(&ioperm_head);
1756 while (data) {
1757 struct ioperm_data *next = QLIST_NEXT(data, entries);
1759 if (data->start_port == start_port && data->num == num) {
1760 QLIST_REMOVE(data, entries);
1761 qemu_free(data);
1764 data = next;
1768 void kvm_ioperm(CPUState *env, void *data)
1770 if (kvm_enabled() && qemu_system_ready) {
1771 on_vcpu(env, kvm_arch_do_ioperm, data);
1775 #endif
1777 int kvm_set_boot_cpu_id(uint32_t id)
1779 return kvm_set_boot_vcpu_id(kvm_context, id);