qemu-kvm: Process exit requests in kvm loop
[qemu-kvm/amd-iommu.git] / qemu-kvm.c
blob43d599d14db7e880c85c92e8a3792a60cb523e6f
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
20 #include "monitor.h"
22 #include "qemu-kvm.h"
23 #include "libkvm.h"
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
30 #include "compatfd.h"
31 #include <sys/prctl.h>
33 #define false 0
34 #define true 1
36 #ifndef PR_MCE_KILL
37 #define PR_MCE_KILL 33
38 #endif
40 #ifndef BUS_MCEERR_AR
41 #define BUS_MCEERR_AR 4
42 #endif
43 #ifndef BUS_MCEERR_AO
44 #define BUS_MCEERR_AO 5
45 #endif
47 #define EXPECTED_KVM_API_VERSION 12
49 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
50 #error libkvm: userspace and kernel version mismatch
51 #endif
53 int kvm_irqchip = 1;
54 int kvm_pit = 1;
55 int kvm_pit_reinject = 1;
56 int kvm_nested = 0;
59 KVMState *kvm_state;
60 kvm_context_t kvm_context;
62 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
63 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
64 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
65 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
66 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
67 __thread CPUState *current_env;
69 static int qemu_system_ready;
71 #define SIG_IPI (SIGRTMIN+4)
73 pthread_t io_thread;
74 static int io_thread_fd = -1;
75 static int io_thread_sigfd = -1;
77 static CPUState *kvm_debug_cpu_requested;
79 static uint64_t phys_ram_size;
81 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
82 /* The list of ioperm_data */
83 static QLIST_HEAD(, ioperm_data) ioperm_head;
84 #endif
86 //#define DEBUG_MEMREG
87 #ifdef DEBUG_MEMREG
88 #define DPRINTF(fmt, args...) \
89 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
90 #else
91 #define DPRINTF(fmt, args...) do {} while (0)
92 #endif
94 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
96 int kvm_abi = EXPECTED_KVM_API_VERSION;
97 int kvm_page_size;
99 #ifdef KVM_CAP_SET_GUEST_DEBUG
100 static int kvm_debug(CPUState *env,
101 struct kvm_debug_exit_arch *arch_info)
103 int handle = kvm_arch_debug(arch_info);
105 if (handle) {
106 kvm_debug_cpu_requested = env;
107 env->stopped = 1;
109 return handle;
111 #endif
113 static int handle_unhandled(uint64_t reason)
115 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
116 return -EINVAL;
120 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
122 uint32_t *bitmap = kvm->used_gsi_bitmap;
124 if (gsi < kvm->max_gsi)
125 bitmap[gsi / 32] |= 1U << (gsi % 32);
126 else
127 DPRINTF("Invalid GSI %u\n", gsi);
130 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
132 uint32_t *bitmap = kvm->used_gsi_bitmap;
134 if (gsi < kvm->max_gsi)
135 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
136 else
137 DPRINTF("Invalid GSI %u\n", gsi);
140 struct slot_info {
141 unsigned long phys_addr;
142 unsigned long len;
143 unsigned long userspace_addr;
144 unsigned flags;
145 int logging_count;
148 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
150 static void init_slots(void)
152 int i;
154 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
155 slots[i].len = 0;
158 static int get_free_slot(kvm_context_t kvm)
160 int i;
161 int tss_ext;
163 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
164 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
165 #else
166 tss_ext = 0;
167 #endif
170 * on older kernels where the set tss ioctl is not supprted we must save
171 * slot 0 to hold the extended memory, as the vmx will use the last 3
172 * pages of this slot.
174 if (tss_ext > 0)
175 i = 0;
176 else
177 i = 1;
179 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
180 if (!slots[i].len)
181 return i;
182 return -1;
185 static void register_slot(int slot, unsigned long phys_addr,
186 unsigned long len, unsigned long userspace_addr,
187 unsigned flags)
189 slots[slot].phys_addr = phys_addr;
190 slots[slot].len = len;
191 slots[slot].userspace_addr = userspace_addr;
192 slots[slot].flags = flags;
195 static void free_slot(int slot)
197 slots[slot].len = 0;
198 slots[slot].logging_count = 0;
201 static int get_slot(unsigned long phys_addr)
203 int i;
205 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
206 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
207 (slots[i].phys_addr + slots[i].len - 1) >= phys_addr)
208 return i;
210 return -1;
213 /* Returns -1 if this slot is not totally contained on any other,
214 * and the number of the slot otherwise */
215 static int get_container_slot(uint64_t phys_addr, unsigned long size)
217 int i;
219 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
220 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
221 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
222 return i;
223 return -1;
226 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr,
227 unsigned long size)
229 int slot = get_container_slot(phys_addr, size);
230 if (slot == -1)
231 return 0;
232 return 1;
236 * dirty pages logging control
238 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
239 unsigned long phys_addr, unsigned flags,
240 unsigned mask)
242 int r = -1;
243 int slot = get_slot(phys_addr);
245 if (slot == -1) {
246 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
247 return 1;
250 flags = (slots[slot].flags & ~mask) | flags;
251 if (flags == slots[slot].flags)
252 return 0;
253 slots[slot].flags = flags;
256 struct kvm_userspace_memory_region mem = {
257 .slot = slot,
258 .memory_size = slots[slot].len,
259 .guest_phys_addr = slots[slot].phys_addr,
260 .userspace_addr = slots[slot].userspace_addr,
261 .flags = slots[slot].flags,
265 DPRINTF("slot %d start %llx len %llx flags %x\n",
266 mem.slot, mem.guest_phys_addr, mem.memory_size, mem.flags);
267 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
268 if (r < 0)
269 fprintf(stderr, "%s: %m\n", __FUNCTION__);
271 return r;
274 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
275 int (*change)(kvm_context_t kvm,
276 uint64_t start,
277 uint64_t len))
279 int i, r;
281 for (i = r = 0; i < KVM_MAX_NUM_MEM_REGIONS && r == 0; i++) {
282 if (slots[i].len)
283 r = change(kvm, slots[i].phys_addr, slots[i].len);
285 return r;
288 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, uint64_t phys_addr,
289 uint64_t len)
291 int slot = get_slot(phys_addr);
293 DPRINTF("start %" PRIx64 " len %" PRIx64 "\n", phys_addr, len);
294 if (slot == -1) {
295 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
296 return -EINVAL;
299 if (slots[slot].logging_count++)
300 return 0;
302 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
303 KVM_MEM_LOG_DIRTY_PAGES,
304 KVM_MEM_LOG_DIRTY_PAGES);
307 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, uint64_t phys_addr,
308 uint64_t len)
310 int slot = get_slot(phys_addr);
312 if (slot == -1) {
313 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
314 return -EINVAL;
317 if (--slots[slot].logging_count)
318 return 0;
320 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, 0,
321 KVM_MEM_LOG_DIRTY_PAGES);
325 * Enable dirty page logging for all memory regions
327 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
329 if (kvm->dirty_pages_log_all)
330 return 0;
331 kvm->dirty_pages_log_all = 1;
332 return kvm_dirty_pages_log_change_all(kvm, kvm_dirty_pages_log_enable_slot);
336 * Enable dirty page logging only for memory regions that were created with
337 * dirty logging enabled (disable for all other memory regions).
339 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
341 if (!kvm->dirty_pages_log_all)
342 return 0;
343 kvm->dirty_pages_log_all = 0;
344 return kvm_dirty_pages_log_change_all(kvm,
345 kvm_dirty_pages_log_disable_slot);
349 static int kvm_create_context(void);
351 int kvm_init(int smp_cpus)
353 int fd;
354 int r, gsi_count;
357 fd = open("/dev/kvm", O_RDWR);
358 if (fd == -1) {
359 perror("open /dev/kvm");
360 return -1;
362 r = ioctl(fd, KVM_GET_API_VERSION, 0);
363 if (r == -1) {
364 fprintf(stderr,
365 "kvm kernel version too old: "
366 "KVM_GET_API_VERSION ioctl not supported\n");
367 goto out_close;
369 if (r < EXPECTED_KVM_API_VERSION) {
370 fprintf(stderr, "kvm kernel version too old: "
371 "We expect API version %d or newer, but got "
372 "version %d\n", EXPECTED_KVM_API_VERSION, r);
373 goto out_close;
375 if (r > EXPECTED_KVM_API_VERSION) {
376 fprintf(stderr, "kvm userspace version too old\n");
377 goto out_close;
379 kvm_abi = r;
380 kvm_page_size = getpagesize();
381 kvm_state = qemu_mallocz(sizeof(*kvm_state));
382 kvm_context = &kvm_state->kvm_context;
384 kvm_state->fd = fd;
385 kvm_state->vmfd = -1;
386 kvm_context->opaque = cpu_single_env;
387 kvm_context->dirty_pages_log_all = 0;
388 kvm_context->no_irqchip_creation = 0;
389 kvm_context->no_pit_creation = 0;
391 #ifdef KVM_CAP_SET_GUEST_DEBUG
392 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
393 #endif
395 gsi_count = kvm_get_gsi_count(kvm_context);
396 if (gsi_count > 0) {
397 int gsi_bits, i;
399 /* Round up so we can search ints using ffs */
400 gsi_bits = ALIGN(gsi_count, 32);
401 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
402 kvm_context->max_gsi = gsi_bits;
404 /* Mark any over-allocated bits as already in use */
405 for (i = gsi_count; i < gsi_bits; i++)
406 set_gsi(kvm_context, i);
409 kvm_cpu_register_phys_memory_client();
411 pthread_mutex_lock(&qemu_mutex);
412 return kvm_create_context();
414 out_close:
415 close(fd);
416 return -1;
419 static void kvm_finalize(KVMState *s)
421 /* FIXME
422 if (kvm->vcpu_fd[0] != -1)
423 close(kvm->vcpu_fd[0]);
424 if (kvm->vm_fd != -1)
425 close(kvm->vm_fd);
427 close(s->fd);
428 free(s);
431 void kvm_disable_irqchip_creation(kvm_context_t kvm)
433 kvm->no_irqchip_creation = 1;
436 void kvm_disable_pit_creation(kvm_context_t kvm)
438 kvm->no_pit_creation = 1;
441 static void kvm_reset_vcpu(void *opaque)
443 CPUState *env = opaque;
445 kvm_arch_cpu_reset(env);
448 static void kvm_create_vcpu(CPUState *env, int id)
450 long mmap_size;
451 int r;
452 KVMState *s = kvm_state;
454 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
455 if (r < 0) {
456 fprintf(stderr, "kvm_create_vcpu: %m\n");
457 fprintf(stderr, "Failed to create vCPU. Check the -smp parameter.\n");
458 goto err;
461 env->kvm_fd = r;
462 env->kvm_state = kvm_state;
464 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
465 if (mmap_size < 0) {
466 fprintf(stderr, "get vcpu mmap size: %m\n");
467 goto err_fd;
469 env->kvm_run =
470 mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,
472 if (env->kvm_run == MAP_FAILED) {
473 fprintf(stderr, "mmap vcpu area: %m\n");
474 goto err_fd;
477 #ifdef KVM_CAP_COALESCED_MMIO
478 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
479 s->coalesced_mmio_ring = (void *) env->kvm_run +
480 s->coalesced_mmio * PAGE_SIZE;
481 #endif
483 r = kvm_arch_init_vcpu(env);
484 if (r == 0) {
485 qemu_register_reset(kvm_reset_vcpu, env);
488 return;
489 err_fd:
490 close(env->kvm_fd);
491 err:
492 /* We're no good with semi-broken states. */
493 abort();
496 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
498 #ifdef KVM_CAP_SET_BOOT_CPU_ID
499 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
500 if (r > 0)
501 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
502 return -ENOSYS;
503 #else
504 return -ENOSYS;
505 #endif
508 int kvm_create_vm(kvm_context_t kvm)
510 int fd;
511 #ifdef KVM_CAP_IRQ_ROUTING
512 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
513 kvm->nr_allocated_irq_routes = 0;
514 #endif
516 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
517 if (fd < 0) {
518 fprintf(stderr, "kvm_create_vm: %m\n");
519 return -1;
521 kvm_state->vmfd = fd;
522 return 0;
525 static int kvm_create_default_phys_mem(kvm_context_t kvm,
526 unsigned long phys_mem_bytes,
527 void **vm_mem)
529 #ifdef KVM_CAP_USER_MEMORY
530 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
531 if (r > 0)
532 return 0;
533 fprintf(stderr,
534 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
535 #else
536 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
537 #endif
538 return -1;
541 void kvm_create_irqchip(kvm_context_t kvm)
543 int r;
545 kvm->irqchip_in_kernel = 0;
546 #ifdef KVM_CAP_IRQCHIP
547 if (!kvm->no_irqchip_creation) {
548 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
549 if (r > 0) { /* kernel irqchip supported */
550 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
551 if (r >= 0) {
552 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
553 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
554 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
555 KVM_CAP_IRQ_INJECT_STATUS);
556 if (r > 0)
557 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
558 #endif
559 kvm->irqchip_in_kernel = 1;
560 } else
561 fprintf(stderr, "Create kernel PIC irqchip failed\n");
564 #endif
565 kvm_state->irqchip_in_kernel = kvm->irqchip_in_kernel;
568 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
570 int r;
572 r = kvm_create_vm(kvm);
573 if (r < 0)
574 return r;
575 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
576 if (r < 0)
577 return r;
578 init_slots();
579 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
580 if (r < 0)
581 return r;
582 kvm_create_irqchip(kvm);
584 return 0;
588 int kvm_register_phys_mem(kvm_context_t kvm,
589 unsigned long phys_start, void *userspace_addr,
590 unsigned long len, int log)
593 struct kvm_userspace_memory_region memory = {
594 .memory_size = len,
595 .guest_phys_addr = phys_start,
596 .userspace_addr = (unsigned long) (uintptr_t) userspace_addr,
597 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
599 int r;
601 memory.slot = get_free_slot(kvm);
602 DPRINTF
603 ("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %x\n",
604 memory.guest_phys_addr, memory.memory_size, memory.userspace_addr,
605 memory.slot, memory.flags);
606 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
607 if (r < 0) {
608 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
609 return -1;
611 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
612 memory.userspace_addr, memory.flags);
613 return 0;
617 /* destroy/free a whole slot.
618 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
620 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
621 unsigned long len)
623 int slot;
624 int r;
625 struct kvm_userspace_memory_region memory = {
626 .memory_size = 0,
627 .guest_phys_addr = phys_start,
628 .userspace_addr = 0,
629 .flags = 0,
632 slot = get_slot(phys_start);
634 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
635 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", __FUNCTION__,
636 slot);
637 return;
639 if (phys_start != slots[slot].phys_addr) {
640 fprintf(stderr,
641 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
642 __FUNCTION__, phys_start, slots[slot].phys_addr);
643 phys_start = slots[slot].phys_addr;
646 memory.slot = slot;
647 DPRINTF("slot %d start %llx len %llx flags %x\n",
648 memory.slot, memory.guest_phys_addr, memory.memory_size,
649 memory.flags);
650 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
651 if (r < 0) {
652 fprintf(stderr, "destroy_userspace_phys_mem: %s", strerror(-r));
653 return;
656 free_slot(memory.slot);
659 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr,
660 unsigned long size)
663 int slot = get_container_slot(phys_addr, size);
665 if (slot != -1) {
666 DPRINTF("Unregistering memory region %" PRIx64 " (%lx)\n", phys_addr, size);
667 kvm_destroy_phys_mem(kvm, phys_addr, size);
668 return;
672 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
674 int r;
675 struct kvm_dirty_log log = {
676 .slot = slot,
679 log.dirty_bitmap = buf;
681 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
682 if (r < 0)
683 return r;
684 return 0;
687 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
689 int slot;
691 slot = get_slot(phys_addr);
692 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
695 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
696 unsigned long len, void *opaque,
697 int (*cb)(unsigned long start,
698 unsigned long len, void *bitmap,
699 void *opaque))
701 int i;
702 int r;
703 unsigned long end_addr = phys_addr + len;
704 void *buf;
706 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
707 if ((slots[i].len && (uint64_t) slots[i].phys_addr >= phys_addr)
708 && ((uint64_t) slots[i].phys_addr + slots[i].len <= end_addr)) {
709 buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
710 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
711 if (r) {
712 qemu_free(buf);
713 return r;
715 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
716 qemu_free(buf);
717 if (r)
718 return r;
721 return 0;
724 #ifdef KVM_CAP_IRQCHIP
726 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
728 struct kvm_irq_level event;
729 int r;
731 if (!kvm->irqchip_in_kernel)
732 return 0;
733 event.level = level;
734 event.irq = irq;
735 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
736 if (r < 0)
737 perror("kvm_set_irq_level");
739 if (status) {
740 #ifdef KVM_CAP_IRQ_INJECT_STATUS
741 *status =
742 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
743 #else
744 *status = 1;
745 #endif
748 return 1;
751 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
753 int r;
755 if (!kvm->irqchip_in_kernel)
756 return 0;
757 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
758 if (r < 0) {
759 perror("kvm_get_irqchip\n");
761 return r;
764 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
766 int r;
768 if (!kvm->irqchip_in_kernel)
769 return 0;
770 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
771 if (r < 0) {
772 perror("kvm_set_irqchip\n");
774 return r;
777 #endif
779 static int handle_debug(CPUState *env)
781 #ifdef KVM_CAP_SET_GUEST_DEBUG
782 struct kvm_run *run = env->kvm_run;
784 return kvm_debug(env, &run->debug.arch);
785 #else
786 return 0;
787 #endif
790 int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
792 return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
795 int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
797 return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
800 int kvm_get_fpu(CPUState *env, struct kvm_fpu *fpu)
802 return kvm_vcpu_ioctl(env, KVM_GET_FPU, fpu);
805 int kvm_set_fpu(CPUState *env, struct kvm_fpu *fpu)
807 return kvm_vcpu_ioctl(env, KVM_SET_FPU, fpu);
810 int kvm_get_sregs(CPUState *env, struct kvm_sregs *sregs)
812 return kvm_vcpu_ioctl(env, KVM_GET_SREGS, sregs);
815 int kvm_set_sregs(CPUState *env, struct kvm_sregs *sregs)
817 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, sregs);
820 #ifdef KVM_CAP_MP_STATE
821 int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
823 int r;
825 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
826 if (r > 0)
827 return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
828 return -ENOSYS;
831 int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
833 int r;
835 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
836 if (r > 0)
837 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
838 return -ENOSYS;
840 #endif
842 static int handle_mmio(CPUState *env)
844 unsigned long addr = env->kvm_run->mmio.phys_addr;
845 struct kvm_run *kvm_run = env->kvm_run;
846 void *data = kvm_run->mmio.data;
848 /* hack: Red Hat 7.1 generates these weird accesses. */
849 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
850 return 0;
852 cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
853 return 0;
856 int handle_io_window(kvm_context_t kvm)
858 return 1;
861 int handle_shutdown(kvm_context_t kvm, CPUState *env)
863 /* stop the current vcpu from going back to guest mode */
864 env->stopped = 1;
866 qemu_system_reset_request();
867 return 1;
870 static inline void push_nmi(kvm_context_t kvm)
872 #ifdef KVM_CAP_USER_NMI
873 kvm_arch_push_nmi(kvm->opaque);
874 #endif /* KVM_CAP_USER_NMI */
877 void post_kvm_run(kvm_context_t kvm, CPUState *env)
879 pthread_mutex_lock(&qemu_mutex);
880 kvm_arch_post_run(env, env->kvm_run);
881 cpu_single_env = env;
884 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
886 kvm_arch_pre_run(env, env->kvm_run);
888 if (env->kvm_vcpu_dirty) {
889 kvm_arch_load_regs(env, KVM_PUT_RUNTIME_STATE);
890 env->kvm_vcpu_dirty = 0;
893 pthread_mutex_unlock(&qemu_mutex);
894 return 0;
897 int kvm_is_ready_for_interrupt_injection(CPUState *env)
899 return env->kvm_run->ready_for_interrupt_injection;
902 int kvm_run(CPUState *env)
904 int r;
905 kvm_context_t kvm = &env->kvm_state->kvm_context;
906 struct kvm_run *run = env->kvm_run;
907 int fd = env->kvm_fd;
909 again:
910 push_nmi(kvm);
911 #if !defined(__s390__)
912 if (!kvm->irqchip_in_kernel)
913 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
914 #endif
916 r = pre_kvm_run(kvm, env);
917 if (r)
918 return r;
919 r = ioctl(fd, KVM_RUN, 0);
921 if (r == -1 && errno != EINTR && errno != EAGAIN) {
922 r = -errno;
923 post_kvm_run(kvm, env);
924 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
925 return r;
928 post_kvm_run(kvm, env);
930 kvm_flush_coalesced_mmio_buffer();
932 #if !defined(__s390__)
933 if (r == -1) {
934 r = handle_io_window(kvm);
935 goto more;
937 #endif
938 if (1) {
939 switch (run->exit_reason) {
940 case KVM_EXIT_UNKNOWN:
941 r = handle_unhandled(run->hw.hardware_exit_reason);
942 break;
943 case KVM_EXIT_FAIL_ENTRY:
944 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
945 break;
946 case KVM_EXIT_EXCEPTION:
947 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
948 run->ex.error_code);
949 kvm_show_regs(env);
950 kvm_show_code(env);
951 abort();
952 break;
953 case KVM_EXIT_IO:
954 r = kvm_handle_io(run->io.port,
955 (uint8_t *)run + run->io.data_offset,
956 run->io.direction,
957 run->io.size,
958 run->io.count);
959 r = 0;
960 break;
961 case KVM_EXIT_DEBUG:
962 r = handle_debug(env);
963 break;
964 case KVM_EXIT_MMIO:
965 r = handle_mmio(env);
966 break;
967 case KVM_EXIT_HLT:
968 r = kvm_arch_halt(env);
969 break;
970 case KVM_EXIT_IRQ_WINDOW_OPEN:
971 break;
972 case KVM_EXIT_SHUTDOWN:
973 r = handle_shutdown(kvm, env);
974 break;
975 #if defined(__s390__)
976 case KVM_EXIT_S390_SIEIC:
977 r = kvm_s390_handle_intercept(kvm, env, run);
978 break;
979 case KVM_EXIT_S390_RESET:
980 r = kvm_s390_handle_reset(kvm, env, run);
981 break;
982 #endif
983 case KVM_EXIT_INTERNAL_ERROR:
984 kvm_handle_internal_error(env, run);
985 r = 1;
986 break;
987 default:
988 if (kvm_arch_run(env)) {
989 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
990 kvm_show_regs(env);
991 abort();
993 break;
996 more:
997 if (!r)
998 goto again;
999 return r;
1002 int kvm_inject_irq(CPUState *env, unsigned irq)
1004 struct kvm_interrupt intr;
1006 intr.irq = irq;
1007 return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
1010 int kvm_inject_nmi(CPUState *env)
1012 #ifdef KVM_CAP_USER_NMI
1013 return kvm_vcpu_ioctl(env, KVM_NMI);
1014 #else
1015 return -ENOSYS;
1016 #endif
1019 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1021 int r = 0;
1022 kvm_state->coalesced_mmio = 0;
1023 #ifdef KVM_CAP_COALESCED_MMIO
1024 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1025 if (r > 0) {
1026 kvm_state->coalesced_mmio = r;
1027 return 0;
1029 #endif
1030 return r;
1033 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1034 int kvm_assign_pci_device(kvm_context_t kvm,
1035 struct kvm_assigned_pci_dev *assigned_dev)
1037 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1040 static int kvm_old_assign_irq(kvm_context_t kvm,
1041 struct kvm_assigned_irq *assigned_irq)
1043 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1046 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1047 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1049 int ret;
1051 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1052 if (ret > 0) {
1053 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1056 return kvm_old_assign_irq(kvm, assigned_irq);
1059 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1061 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1063 #else
1064 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1066 return kvm_old_assign_irq(kvm, assigned_irq);
1068 #endif
1069 #endif
1071 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1072 int kvm_deassign_pci_device(kvm_context_t kvm,
1073 struct kvm_assigned_pci_dev *assigned_dev)
1075 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1077 #endif
1079 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1081 int ret = 0;
1083 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1084 ret =
1085 kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1086 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1087 if (ret <= 0)
1088 ret = 0;
1089 #endif
1090 return ret;
1093 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1095 #ifdef KVM_CAP_REINJECT_CONTROL
1096 int r;
1097 struct kvm_reinject_control control;
1099 control.pit_reinject = pit_reinject;
1101 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1102 if (r > 0) {
1103 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1105 #endif
1106 return -ENOSYS;
1109 int kvm_has_gsi_routing(kvm_context_t kvm)
1111 int r = 0;
1113 #ifdef KVM_CAP_IRQ_ROUTING
1114 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1115 #endif
1116 return r;
1119 int kvm_get_gsi_count(kvm_context_t kvm)
1121 #ifdef KVM_CAP_IRQ_ROUTING
1122 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1123 #else
1124 return -EINVAL;
1125 #endif
1128 int kvm_clear_gsi_routes(kvm_context_t kvm)
1130 #ifdef KVM_CAP_IRQ_ROUTING
1131 kvm->irq_routes->nr = 0;
1132 return 0;
1133 #else
1134 return -EINVAL;
1135 #endif
1138 int kvm_add_routing_entry(kvm_context_t kvm,
1139 struct kvm_irq_routing_entry *entry)
1141 #ifdef KVM_CAP_IRQ_ROUTING
1142 struct kvm_irq_routing *z;
1143 struct kvm_irq_routing_entry *new;
1144 int n, size;
1146 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1147 n = kvm->nr_allocated_irq_routes * 2;
1148 if (n < 64)
1149 n = 64;
1150 size = sizeof(struct kvm_irq_routing);
1151 size += n * sizeof(*new);
1152 z = realloc(kvm->irq_routes, size);
1153 if (!z)
1154 return -ENOMEM;
1155 kvm->nr_allocated_irq_routes = n;
1156 kvm->irq_routes = z;
1158 n = kvm->irq_routes->nr++;
1159 new = &kvm->irq_routes->entries[n];
1160 memset(new, 0, sizeof(*new));
1161 new->gsi = entry->gsi;
1162 new->type = entry->type;
1163 new->flags = entry->flags;
1164 new->u = entry->u;
1166 set_gsi(kvm, entry->gsi);
1168 return 0;
1169 #else
1170 return -ENOSYS;
1171 #endif
1174 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1176 #ifdef KVM_CAP_IRQ_ROUTING
1177 struct kvm_irq_routing_entry e;
1179 e.gsi = gsi;
1180 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1181 e.flags = 0;
1182 e.u.irqchip.irqchip = irqchip;
1183 e.u.irqchip.pin = pin;
1184 return kvm_add_routing_entry(kvm, &e);
1185 #else
1186 return -ENOSYS;
1187 #endif
1190 int kvm_del_routing_entry(kvm_context_t kvm,
1191 struct kvm_irq_routing_entry *entry)
1193 #ifdef KVM_CAP_IRQ_ROUTING
1194 struct kvm_irq_routing_entry *e, *p;
1195 int i, gsi, found = 0;
1197 gsi = entry->gsi;
1199 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1200 e = &kvm->irq_routes->entries[i];
1201 if (e->type == entry->type && e->gsi == gsi) {
1202 switch (e->type) {
1203 case KVM_IRQ_ROUTING_IRQCHIP:{
1204 if (e->u.irqchip.irqchip ==
1205 entry->u.irqchip.irqchip
1206 && e->u.irqchip.pin == entry->u.irqchip.pin) {
1207 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1208 *e = *p;
1209 found = 1;
1211 break;
1213 case KVM_IRQ_ROUTING_MSI:{
1214 if (e->u.msi.address_lo ==
1215 entry->u.msi.address_lo
1216 && e->u.msi.address_hi ==
1217 entry->u.msi.address_hi
1218 && e->u.msi.data == entry->u.msi.data) {
1219 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1220 *e = *p;
1221 found = 1;
1223 break;
1225 default:
1226 break;
1228 if (found) {
1229 /* If there are no other users of this GSI
1230 * mark it available in the bitmap */
1231 for (i = 0; i < kvm->irq_routes->nr; i++) {
1232 e = &kvm->irq_routes->entries[i];
1233 if (e->gsi == gsi)
1234 break;
1236 if (i == kvm->irq_routes->nr)
1237 clear_gsi(kvm, gsi);
1239 return 0;
1243 return -ESRCH;
1244 #else
1245 return -ENOSYS;
1246 #endif
1249 int kvm_update_routing_entry(kvm_context_t kvm,
1250 struct kvm_irq_routing_entry *entry,
1251 struct kvm_irq_routing_entry *newentry)
1253 #ifdef KVM_CAP_IRQ_ROUTING
1254 struct kvm_irq_routing_entry *e;
1255 int i;
1257 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
1258 return -EINVAL;
1261 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1262 e = &kvm->irq_routes->entries[i];
1263 if (e->type != entry->type || e->gsi != entry->gsi) {
1264 continue;
1266 switch (e->type) {
1267 case KVM_IRQ_ROUTING_IRQCHIP:
1268 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1269 e->u.irqchip.pin == entry->u.irqchip.pin) {
1270 memcpy(&e->u.irqchip, &newentry->u.irqchip,
1271 sizeof e->u.irqchip);
1272 return 0;
1274 break;
1275 case KVM_IRQ_ROUTING_MSI:
1276 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1277 e->u.msi.address_hi == entry->u.msi.address_hi &&
1278 e->u.msi.data == entry->u.msi.data) {
1279 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1280 return 0;
1282 break;
1283 default:
1284 break;
1287 return -ESRCH;
1288 #else
1289 return -ENOSYS;
1290 #endif
1293 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1295 #ifdef KVM_CAP_IRQ_ROUTING
1296 struct kvm_irq_routing_entry e;
1298 e.gsi = gsi;
1299 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1300 e.flags = 0;
1301 e.u.irqchip.irqchip = irqchip;
1302 e.u.irqchip.pin = pin;
1303 return kvm_del_routing_entry(kvm, &e);
1304 #else
1305 return -ENOSYS;
1306 #endif
1309 int kvm_commit_irq_routes(kvm_context_t kvm)
1311 #ifdef KVM_CAP_IRQ_ROUTING
1312 kvm->irq_routes->flags = 0;
1313 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1314 #else
1315 return -ENOSYS;
1316 #endif
1319 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1321 int i, bit;
1322 uint32_t *buf = kvm->used_gsi_bitmap;
1324 /* Return the lowest unused GSI in the bitmap */
1325 for (i = 0; i < kvm->max_gsi / 32; i++) {
1326 bit = ffs(~buf[i]);
1327 if (!bit)
1328 continue;
1330 return bit - 1 + i * 32;
1333 return -ENOSPC;
1336 #ifdef KVM_CAP_DEVICE_MSIX
1337 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1338 struct kvm_assigned_msix_nr *msix_nr)
1340 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1343 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1344 struct kvm_assigned_msix_entry *entry)
1346 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1348 #endif
1350 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1352 #include <sys/eventfd.h>
1354 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1356 struct kvm_irqfd data = {
1357 .fd = fd,
1358 .gsi = gsi,
1359 .flags = flags,
1362 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1365 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1367 int r;
1368 int fd;
1370 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1371 return -ENOENT;
1373 fd = eventfd(0, 0);
1374 if (fd < 0)
1375 return -errno;
1377 r = _kvm_irqfd(kvm, fd, gsi, 0);
1378 if (r < 0) {
1379 close(fd);
1380 return -errno;
1383 return fd;
1386 #else /* KVM_CAP_IRQFD */
1388 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1390 return -ENOSYS;
1393 #endif /* KVM_CAP_IRQFD */
1394 unsigned long kvm_get_thread_id(void)
1396 return syscall(SYS_gettid);
1399 static void qemu_cond_wait(pthread_cond_t *cond)
1401 CPUState *env = cpu_single_env;
1403 pthread_cond_wait(cond, &qemu_mutex);
1404 cpu_single_env = env;
1407 static void sig_ipi_handler(int n)
1411 static void hardware_memory_error(void)
1413 fprintf(stderr, "Hardware memory error!\n");
1414 exit(1);
1417 static void sigbus_reraise(void)
1419 sigset_t set;
1420 struct sigaction action;
1422 memset(&action, 0, sizeof(action));
1423 action.sa_handler = SIG_DFL;
1424 if (!sigaction(SIGBUS, &action, NULL)) {
1425 raise(SIGBUS);
1426 sigemptyset(&set);
1427 sigaddset(&set, SIGBUS);
1428 sigprocmask(SIG_UNBLOCK, &set, NULL);
1430 perror("Failed to re-raise SIGBUS!\n");
1431 abort();
1434 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
1435 void *ctx)
1437 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1438 if (first_cpu->mcg_cap && siginfo->ssi_addr
1439 && siginfo->ssi_code == BUS_MCEERR_AO) {
1440 uint64_t status;
1441 unsigned long paddr;
1442 CPUState *cenv;
1444 /* Hope we are lucky for AO MCE */
1445 if (do_qemu_ram_addr_from_host((void *)(intptr_t)siginfo->ssi_addr,
1446 &paddr)) {
1447 fprintf(stderr, "Hardware memory error for memory used by "
1448 "QEMU itself instead of guest system!: %llx\n",
1449 (unsigned long long)siginfo->ssi_addr);
1450 return;
1452 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1453 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1454 | 0xc0;
1455 kvm_inject_x86_mce(first_cpu, 9, status,
1456 MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
1457 (MCM_ADDR_PHYS << 6) | 0xc, 1);
1458 for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu)
1459 kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
1460 MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
1461 } else
1462 #endif
1464 if (siginfo->ssi_code == BUS_MCEERR_AO)
1465 return;
1466 else if (siginfo->ssi_code == BUS_MCEERR_AR)
1467 hardware_memory_error();
1468 else
1469 sigbus_reraise();
1473 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1475 struct qemu_work_item wi;
1477 if (env == current_env) {
1478 func(data);
1479 return;
1482 wi.func = func;
1483 wi.data = data;
1484 if (!env->kvm_cpu_state.queued_work_first)
1485 env->kvm_cpu_state.queued_work_first = &wi;
1486 else
1487 env->kvm_cpu_state.queued_work_last->next = &wi;
1488 env->kvm_cpu_state.queued_work_last = &wi;
1489 wi.next = NULL;
1490 wi.done = false;
1492 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1493 while (!wi.done)
1494 qemu_cond_wait(&qemu_work_cond);
1497 static void do_kvm_cpu_synchronize_state(void *_env)
1499 CPUState *env = _env;
1501 if (!env->kvm_vcpu_dirty) {
1502 kvm_arch_save_regs(env);
1503 env->kvm_vcpu_dirty = 1;
1507 void kvm_cpu_synchronize_state(CPUState *env)
1509 if (!env->kvm_vcpu_dirty)
1510 on_vcpu(env, do_kvm_cpu_synchronize_state, env);
1513 void kvm_cpu_synchronize_post_reset(CPUState *env)
1515 kvm_arch_load_regs(env, KVM_PUT_RESET_STATE);
1516 env->kvm_vcpu_dirty = 0;
1519 void kvm_cpu_synchronize_post_init(CPUState *env)
1521 kvm_arch_load_regs(env, KVM_PUT_FULL_STATE);
1522 env->kvm_vcpu_dirty = 0;
1525 static void inject_interrupt(void *data)
1527 cpu_interrupt(current_env, (long) data);
1530 void kvm_inject_interrupt(CPUState *env, int mask)
1532 on_vcpu(env, inject_interrupt, (void *) (long) mask);
1535 void kvm_update_interrupt_request(CPUState *env)
1537 int signal = 0;
1539 if (env) {
1540 if (!current_env || !current_env->created)
1541 signal = 1;
1543 * Testing for created here is really redundant
1545 if (current_env && current_env->created &&
1546 env != current_env && !env->kvm_cpu_state.signalled)
1547 signal = 1;
1549 if (signal) {
1550 env->kvm_cpu_state.signalled = 1;
1551 if (env->kvm_cpu_state.thread)
1552 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1557 int kvm_cpu_exec(CPUState *env)
1559 int r;
1561 r = kvm_run(env);
1562 if (r < 0) {
1563 printf("kvm_run returned %d\n", r);
1564 vm_stop(0);
1567 return 0;
1570 int kvm_cpu_is_stopped(CPUState *env)
1572 return !vm_running || env->stopped;
1575 static void flush_queued_work(CPUState *env)
1577 struct qemu_work_item *wi;
1579 if (!env->kvm_cpu_state.queued_work_first)
1580 return;
1582 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1583 env->kvm_cpu_state.queued_work_first = wi->next;
1584 wi->func(wi->data);
1585 wi->done = true;
1587 env->kvm_cpu_state.queued_work_last = NULL;
1588 pthread_cond_broadcast(&qemu_work_cond);
1591 static int kvm_mce_in_exception(CPUState *env)
1593 struct kvm_msr_entry msr_mcg_status = {
1594 .index = MSR_MCG_STATUS,
1596 int r;
1598 r = kvm_get_msrs(env, &msr_mcg_status, 1);
1599 if (r == -1 || r == 0)
1600 return -1;
1601 return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
1604 static void kvm_on_sigbus(CPUState *env, siginfo_t *siginfo)
1606 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1607 struct kvm_x86_mce mce = {
1608 .bank = 9,
1610 unsigned long paddr;
1611 int r;
1613 if (env->mcg_cap && siginfo->si_addr
1614 && (siginfo->si_code == BUS_MCEERR_AR
1615 || siginfo->si_code == BUS_MCEERR_AO)) {
1616 if (siginfo->si_code == BUS_MCEERR_AR) {
1617 /* Fake an Intel architectural Data Load SRAR UCR */
1618 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1619 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1620 | MCI_STATUS_AR | 0x134;
1621 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1622 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
1623 } else {
1625 * If there is an MCE excpetion being processed, ignore
1626 * this SRAO MCE
1628 r = kvm_mce_in_exception(env);
1629 if (r == -1)
1630 fprintf(stderr, "Failed to get MCE status\n");
1631 else if (r)
1632 return;
1633 /* Fake an Intel architectural Memory scrubbing UCR */
1634 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1635 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1636 | 0xc0;
1637 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1638 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
1640 if (do_qemu_ram_addr_from_host((void *)siginfo->si_addr, &paddr)) {
1641 fprintf(stderr, "Hardware memory error for memory used by "
1642 "QEMU itself instaed of guest system!\n");
1643 /* Hope we are lucky for AO MCE */
1644 if (siginfo->si_code == BUS_MCEERR_AO)
1645 return;
1646 else
1647 hardware_memory_error();
1649 mce.addr = paddr;
1650 r = kvm_set_mce(env, &mce);
1651 if (r < 0) {
1652 fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1653 abort();
1655 } else
1656 #endif
1658 if (siginfo->si_code == BUS_MCEERR_AO)
1659 return;
1660 else if (siginfo->si_code == BUS_MCEERR_AR)
1661 hardware_memory_error();
1662 else
1663 sigbus_reraise();
1667 static void kvm_main_loop_wait(CPUState *env, int timeout)
1669 struct timespec ts;
1670 int r, e;
1671 siginfo_t siginfo;
1672 sigset_t waitset;
1673 sigset_t chkset;
1675 ts.tv_sec = timeout / 1000;
1676 ts.tv_nsec = (timeout % 1000) * 1000000;
1677 sigemptyset(&waitset);
1678 sigaddset(&waitset, SIG_IPI);
1679 sigaddset(&waitset, SIGBUS);
1681 do {
1682 pthread_mutex_unlock(&qemu_mutex);
1684 r = sigtimedwait(&waitset, &siginfo, &ts);
1685 e = errno;
1687 pthread_mutex_lock(&qemu_mutex);
1689 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1690 printf("sigtimedwait: %s\n", strerror(e));
1691 exit(1);
1694 switch (r) {
1695 case SIGBUS:
1696 kvm_on_sigbus(env, &siginfo);
1697 break;
1698 default:
1699 break;
1702 r = sigpending(&chkset);
1703 if (r == -1) {
1704 printf("sigpending: %s\n", strerror(e));
1705 exit(1);
1707 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1709 cpu_single_env = env;
1710 flush_queued_work(env);
1712 if (env->stop) {
1713 env->stop = 0;
1714 env->stopped = 1;
1715 pthread_cond_signal(&qemu_pause_cond);
1718 env->kvm_cpu_state.signalled = 0;
1721 static int all_threads_paused(void)
1723 CPUState *penv = first_cpu;
1725 while (penv) {
1726 if (penv->stop)
1727 return 0;
1728 penv = (CPUState *) penv->next_cpu;
1731 return 1;
1734 static void pause_all_threads(void)
1736 CPUState *penv = first_cpu;
1738 while (penv) {
1739 if (penv != cpu_single_env) {
1740 penv->stop = 1;
1741 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1742 } else {
1743 penv->stop = 0;
1744 penv->stopped = 1;
1745 cpu_exit(penv);
1747 penv = (CPUState *) penv->next_cpu;
1750 while (!all_threads_paused())
1751 qemu_cond_wait(&qemu_pause_cond);
1754 static void resume_all_threads(void)
1756 CPUState *penv = first_cpu;
1758 assert(!cpu_single_env);
1760 while (penv) {
1761 penv->stop = 0;
1762 penv->stopped = 0;
1763 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1764 penv = (CPUState *) penv->next_cpu;
1768 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1770 if (running)
1771 resume_all_threads();
1772 else
1773 pause_all_threads();
1776 static void setup_kernel_sigmask(CPUState *env)
1778 sigset_t set;
1780 sigemptyset(&set);
1781 sigaddset(&set, SIGUSR2);
1782 sigaddset(&set, SIGIO);
1783 sigaddset(&set, SIGALRM);
1784 sigprocmask(SIG_BLOCK, &set, NULL);
1786 sigprocmask(SIG_BLOCK, NULL, &set);
1787 sigdelset(&set, SIG_IPI);
1788 sigdelset(&set, SIGBUS);
1790 kvm_set_signal_mask(env, &set);
1793 static void qemu_kvm_system_reset(void)
1795 pause_all_threads();
1797 qemu_system_reset();
1799 resume_all_threads();
1802 static void process_irqchip_events(CPUState *env)
1804 kvm_arch_process_irqchip_events(env);
1805 if (kvm_arch_has_work(env))
1806 env->halted = 0;
1809 static int kvm_main_loop_cpu(CPUState *env)
1811 while (1) {
1812 int run_cpu = !kvm_cpu_is_stopped(env);
1813 if (run_cpu && !kvm_irqchip_in_kernel()) {
1814 process_irqchip_events(env);
1815 run_cpu = !env->halted;
1817 if (run_cpu) {
1818 kvm_cpu_exec(env);
1819 kvm_main_loop_wait(env, 0);
1820 } else {
1821 kvm_main_loop_wait(env, 1000);
1824 pthread_mutex_unlock(&qemu_mutex);
1825 return 0;
1828 static void *ap_main_loop(void *_env)
1830 CPUState *env = _env;
1831 sigset_t signals;
1832 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1833 struct ioperm_data *data = NULL;
1834 #endif
1836 current_env = env;
1837 env->thread_id = kvm_get_thread_id();
1838 sigfillset(&signals);
1839 sigprocmask(SIG_BLOCK, &signals, NULL);
1841 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1842 /* do ioperm for io ports of assigned devices */
1843 QLIST_FOREACH(data, &ioperm_head, entries)
1844 on_vcpu(env, kvm_arch_do_ioperm, data);
1845 #endif
1847 pthread_mutex_lock(&qemu_mutex);
1848 cpu_single_env = env;
1850 kvm_create_vcpu(env, env->cpu_index);
1851 setup_kernel_sigmask(env);
1853 /* signal VCPU creation */
1854 current_env->created = 1;
1855 pthread_cond_signal(&qemu_vcpu_cond);
1857 /* and wait for machine initialization */
1858 while (!qemu_system_ready)
1859 qemu_cond_wait(&qemu_system_cond);
1861 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1862 cpu_single_env = env;
1864 kvm_main_loop_cpu(env);
1865 return NULL;
1868 int kvm_init_vcpu(CPUState *env)
1870 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1872 while (env->created == 0)
1873 qemu_cond_wait(&qemu_vcpu_cond);
1875 return 0;
1878 int kvm_vcpu_inited(CPUState *env)
1880 return env->created;
1883 #ifdef TARGET_I386
1884 void kvm_hpet_disable_kpit(void)
1886 struct kvm_pit_state2 ps2;
1888 kvm_get_pit2(kvm_context, &ps2);
1889 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1890 kvm_set_pit2(kvm_context, &ps2);
1893 void kvm_hpet_enable_kpit(void)
1895 struct kvm_pit_state2 ps2;
1897 kvm_get_pit2(kvm_context, &ps2);
1898 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1899 kvm_set_pit2(kvm_context, &ps2);
1901 #endif
1903 int kvm_init_ap(void)
1905 struct sigaction action;
1907 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1909 signal(SIG_IPI, sig_ipi_handler);
1911 memset(&action, 0, sizeof(action));
1912 action.sa_flags = SA_SIGINFO;
1913 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
1914 sigaction(SIGBUS, &action, NULL);
1915 prctl(PR_MCE_KILL, 1, 1, 0, 0);
1916 return 0;
1919 void qemu_kvm_notify_work(void)
1921 /* Write 8 bytes to be compatible with eventfd. */
1922 static uint64_t val = 1;
1923 ssize_t ret;
1925 if (io_thread_fd == -1)
1926 return;
1928 do {
1929 ret = write(io_thread_fd, &val, sizeof(val));
1930 } while (ret < 0 && errno == EINTR);
1932 /* EAGAIN is fine in case we have a pipe. */
1933 if (ret < 0 && errno != EAGAIN) {
1934 fprintf(stderr, "qemu_kvm_notify_work: write() filed: %s\n",
1935 strerror(errno));
1936 exit (1);
1940 /* If we have signalfd, we mask out the signals we want to handle and then
1941 * use signalfd to listen for them. We rely on whatever the current signal
1942 * handler is to dispatch the signals when we receive them.
1945 static void sigfd_handler(void *opaque)
1947 int fd = (unsigned long) opaque;
1948 struct qemu_signalfd_siginfo info;
1949 struct sigaction action;
1950 ssize_t len;
1952 while (1) {
1953 do {
1954 len = read(fd, &info, sizeof(info));
1955 } while (len == -1 && errno == EINTR);
1957 if (len == -1 && errno == EAGAIN)
1958 break;
1960 if (len != sizeof(info)) {
1961 printf("read from sigfd returned %zd: %m\n", len);
1962 return;
1965 sigaction(info.ssi_signo, NULL, &action);
1966 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction)
1967 action.sa_sigaction(info.ssi_signo,
1968 (siginfo_t *)&info, NULL);
1969 else if (action.sa_handler)
1970 action.sa_handler(info.ssi_signo);
1975 /* Used to break IO thread out of select */
1976 static void io_thread_wakeup(void *opaque)
1978 int fd = (unsigned long) opaque;
1979 ssize_t len;
1980 char buffer[512];
1982 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
1983 do {
1984 len = read(fd, buffer, sizeof(buffer));
1985 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
1988 int kvm_main_loop(void)
1990 int fds[2];
1991 sigset_t mask;
1992 int sigfd;
1994 io_thread = pthread_self();
1995 qemu_system_ready = 1;
1997 if (qemu_eventfd(fds) == -1) {
1998 fprintf(stderr, "failed to create eventfd\n");
1999 return -errno;
2002 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2003 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2005 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2006 (void *)(unsigned long) fds[0]);
2008 io_thread_fd = fds[1];
2010 sigemptyset(&mask);
2011 sigaddset(&mask, SIGIO);
2012 sigaddset(&mask, SIGALRM);
2013 sigaddset(&mask, SIGBUS);
2014 sigprocmask(SIG_BLOCK, &mask, NULL);
2016 sigfd = qemu_signalfd(&mask);
2017 if (sigfd == -1) {
2018 fprintf(stderr, "failed to create signalfd\n");
2019 return -errno;
2022 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2024 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2025 (void *)(unsigned long) sigfd);
2027 pthread_cond_broadcast(&qemu_system_cond);
2029 io_thread_sigfd = sigfd;
2030 cpu_single_env = NULL;
2032 while (1) {
2033 main_loop_wait(0);
2034 if (qemu_shutdown_requested()) {
2035 monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
2036 if (qemu_no_shutdown()) {
2037 vm_stop(0);
2038 } else
2039 break;
2040 } else if (qemu_powerdown_requested()) {
2041 monitor_protocol_event(QEVENT_POWERDOWN, NULL);
2042 qemu_irq_raise(qemu_system_powerdown);
2043 } else if (qemu_reset_requested()) {
2044 qemu_kvm_system_reset();
2045 } else if (kvm_debug_cpu_requested) {
2046 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2047 vm_stop(EXCP_DEBUG);
2048 kvm_debug_cpu_requested = NULL;
2050 if (qemu_exit_requested()) {
2051 exit(0);
2055 pause_all_threads();
2056 pthread_mutex_unlock(&qemu_mutex);
2058 return 0;
2061 #ifdef TARGET_I386
2062 static int destroy_region_works = 0;
2063 #endif
2066 #if !defined(TARGET_I386)
2067 int kvm_arch_init_irq_routing(void)
2069 return 0;
2071 #endif
2073 extern int no_hpet;
2075 static int kvm_create_context(void)
2077 int r;
2079 if (!kvm_irqchip) {
2080 kvm_disable_irqchip_creation(kvm_context);
2082 if (!kvm_pit) {
2083 kvm_disable_pit_creation(kvm_context);
2085 if (kvm_create(kvm_context, 0, NULL) < 0) {
2086 kvm_finalize(kvm_state);
2087 return -1;
2089 r = kvm_arch_qemu_create_context();
2090 if (r < 0) {
2091 kvm_finalize(kvm_state);
2092 return -1;
2094 if (kvm_pit && !kvm_pit_reinject) {
2095 if (kvm_reinject_control(kvm_context, 0)) {
2096 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2097 return -1;
2100 #ifdef TARGET_I386
2101 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2102 #endif
2104 r = kvm_arch_init_irq_routing();
2105 if (r < 0) {
2106 return r;
2109 kvm_state->vcpu_events = 0;
2110 #ifdef KVM_CAP_VCPU_EVENTS
2111 kvm_state->vcpu_events = kvm_check_extension(kvm_state, KVM_CAP_VCPU_EVENTS);
2112 #endif
2114 kvm_state->debugregs = 0;
2115 #ifdef KVM_CAP_DEBUGREGS
2116 kvm_state->debugregs = kvm_check_extension(kvm_state, KVM_CAP_DEBUGREGS);
2117 #endif
2119 kvm_init_ap();
2120 if (kvm_irqchip) {
2121 if (!qemu_kvm_has_gsi_routing()) {
2122 irq0override = 0;
2123 #ifdef TARGET_I386
2124 /* if kernel can't do irq routing, interrupt source
2125 * override 0->2 can not be set up as required by hpet,
2126 * so disable hpet.
2128 no_hpet = 1;
2129 } else if (!qemu_kvm_has_pit_state2()) {
2130 no_hpet = 1;
2132 #else
2134 #endif
2137 return 0;
2140 #ifdef TARGET_I386
2141 static int must_use_aliases_source(target_phys_addr_t addr)
2143 if (destroy_region_works)
2144 return false;
2145 if (addr == 0xa0000 || addr == 0xa8000)
2146 return true;
2147 return false;
2150 static int must_use_aliases_target(target_phys_addr_t addr)
2152 if (destroy_region_works)
2153 return false;
2154 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2155 return true;
2156 return false;
2159 static struct mapping {
2160 target_phys_addr_t phys;
2161 ram_addr_t ram;
2162 ram_addr_t len;
2163 } mappings[50];
2164 static int nr_mappings;
2166 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2168 struct mapping *p;
2170 for (p = mappings; p < mappings + nr_mappings; ++p) {
2171 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2172 return p;
2175 return NULL;
2178 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2180 struct mapping *p;
2182 for (p = mappings; p < mappings + nr_mappings; ++p) {
2183 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2184 return p;
2187 return NULL;
2190 static void drop_mapping(target_phys_addr_t start_addr)
2192 struct mapping *p = find_mapping(start_addr);
2194 if (p)
2195 *p = mappings[--nr_mappings];
2197 #endif
2199 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2200 ram_addr_t phys_offset)
2202 int r = 0;
2203 unsigned long area_flags;
2204 #ifdef TARGET_I386
2205 struct mapping *p;
2206 #endif
2208 if (start_addr + size > phys_ram_size) {
2209 phys_ram_size = start_addr + size;
2212 phys_offset &= ~IO_MEM_ROM;
2213 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2215 if (area_flags != IO_MEM_RAM) {
2216 #ifdef TARGET_I386
2217 if (must_use_aliases_source(start_addr)) {
2218 kvm_destroy_memory_alias(kvm_context, start_addr);
2219 return;
2221 if (must_use_aliases_target(start_addr))
2222 return;
2223 #endif
2224 while (size > 0) {
2225 p = find_mapping(start_addr);
2226 if (p) {
2227 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2228 drop_mapping(p->phys);
2230 start_addr += TARGET_PAGE_SIZE;
2231 if (size > TARGET_PAGE_SIZE) {
2232 size -= TARGET_PAGE_SIZE;
2233 } else {
2234 size = 0;
2237 return;
2240 r = kvm_is_containing_region(kvm_context, start_addr, size);
2241 if (r)
2242 return;
2244 if (area_flags >= TLB_MMIO)
2245 return;
2247 #ifdef TARGET_I386
2248 if (must_use_aliases_source(start_addr)) {
2249 p = find_ram_mapping(phys_offset);
2250 if (p) {
2251 kvm_create_memory_alias(kvm_context, start_addr, size,
2252 p->phys + (phys_offset - p->ram));
2254 return;
2256 #endif
2258 r = kvm_register_phys_mem(kvm_context, start_addr,
2259 qemu_get_ram_ptr(phys_offset), size, 0);
2260 if (r < 0) {
2261 printf("kvm_cpu_register_physical_memory: failed\n");
2262 exit(1);
2264 #ifdef TARGET_I386
2265 drop_mapping(start_addr);
2266 p = &mappings[nr_mappings++];
2267 p->phys = start_addr;
2268 p->ram = phys_offset;
2269 p->len = size;
2270 #endif
2272 return;
2276 * dirty pages logging
2278 /* FIXME: use unsigned long pointer instead of unsigned char */
2279 unsigned char *kvm_dirty_bitmap = NULL;
2280 int kvm_physical_memory_set_dirty_tracking(int enable)
2282 int r = 0;
2284 if (!kvm_enabled())
2285 return 0;
2287 if (enable) {
2288 if (!kvm_dirty_bitmap) {
2289 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2290 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2291 r = kvm_dirty_pages_log_enable_all(kvm_context);
2293 } else {
2294 if (kvm_dirty_bitmap) {
2295 r = kvm_dirty_pages_log_reset(kvm_context);
2296 qemu_free(kvm_dirty_bitmap);
2297 kvm_dirty_bitmap = NULL;
2300 return r;
2303 /* get kvm's dirty pages bitmap and update qemu's */
2304 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2305 unsigned long *bitmap,
2306 unsigned long offset,
2307 unsigned long mem_size)
2309 unsigned int i, j;
2310 unsigned long page_number, addr, addr1, c;
2311 ram_addr_t ram_addr;
2312 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
2313 HOST_LONG_BITS;
2316 * bitmap-traveling is faster than memory-traveling (for addr...)
2317 * especially when most of the memory is not dirty.
2319 for (i = 0; i < len; i++) {
2320 if (bitmap[i] != 0) {
2321 c = leul_to_cpu(bitmap[i]);
2322 do {
2323 j = ffsl(c) - 1;
2324 c &= ~(1ul << j);
2325 page_number = i * HOST_LONG_BITS + j;
2326 addr1 = page_number * TARGET_PAGE_SIZE;
2327 addr = offset + addr1;
2328 ram_addr = cpu_get_physical_page_desc(addr);
2329 cpu_physical_memory_set_dirty(ram_addr);
2330 } while (c != 0);
2333 return 0;
2336 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2337 void *bitmap, void *opaque)
2339 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2342 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2343 int log)
2345 if (log)
2346 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2347 else {
2348 #ifdef TARGET_I386
2349 if (must_use_aliases_target(start))
2350 return;
2351 #endif
2352 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2356 #ifdef KVM_CAP_IRQCHIP
2358 int kvm_set_irq(int irq, int level, int *status)
2360 return kvm_set_irq_level(kvm_context, irq, level, status);
2363 #endif
2365 void kvm_mutex_unlock(void)
2367 assert(!cpu_single_env);
2368 pthread_mutex_unlock(&qemu_mutex);
2371 void kvm_mutex_lock(void)
2373 pthread_mutex_lock(&qemu_mutex);
2374 cpu_single_env = NULL;
2377 void qemu_mutex_unlock_iothread(void)
2379 if (kvm_enabled())
2380 kvm_mutex_unlock();
2383 void qemu_mutex_lock_iothread(void)
2385 if (kvm_enabled())
2386 kvm_mutex_lock();
2389 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2390 void kvm_add_ioperm_data(struct ioperm_data *data)
2392 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
2395 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2397 struct ioperm_data *data;
2399 data = QLIST_FIRST(&ioperm_head);
2400 while (data) {
2401 struct ioperm_data *next = QLIST_NEXT(data, entries);
2403 if (data->start_port == start_port && data->num == num) {
2404 QLIST_REMOVE(data, entries);
2405 qemu_free(data);
2408 data = next;
2412 void kvm_ioperm(CPUState *env, void *data)
2414 if (kvm_enabled() && qemu_system_ready)
2415 on_vcpu(env, kvm_arch_do_ioperm, data);
2418 #endif
2420 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2421 target_phys_addr_t end_addr)
2423 #ifndef TARGET_IA64
2425 #ifdef TARGET_I386
2426 if (must_use_aliases_source(start_addr))
2427 return 0;
2428 #endif
2430 kvm_get_dirty_pages_range(kvm_context, start_addr,
2431 end_addr - start_addr, NULL,
2432 kvm_get_dirty_bitmap_cb);
2433 #endif
2434 return 0;
2437 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t len)
2439 #ifdef TARGET_I386
2440 if (must_use_aliases_source(phys_addr))
2441 return 0;
2442 #endif
2444 #ifndef TARGET_IA64
2445 kvm_qemu_log_memory(phys_addr, len, 1);
2446 #endif
2447 return 0;
2450 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t len)
2452 #ifdef TARGET_I386
2453 if (must_use_aliases_source(phys_addr))
2454 return 0;
2455 #endif
2457 #ifndef TARGET_IA64
2458 kvm_qemu_log_memory(phys_addr, len, 0);
2459 #endif
2460 return 0;
2463 int kvm_set_boot_cpu_id(uint32_t id)
2465 return kvm_set_boot_vcpu_id(kvm_context, id);
2468 #ifdef TARGET_I386
2469 #ifdef KVM_CAP_MCE
2470 struct kvm_x86_mce_data {
2471 CPUState *env;
2472 struct kvm_x86_mce *mce;
2473 int abort_on_error;
2476 static void kvm_do_inject_x86_mce(void *_data)
2478 struct kvm_x86_mce_data *data = _data;
2479 int r;
2481 /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
2482 r = kvm_mce_in_exception(data->env);
2483 if (r == -1)
2484 fprintf(stderr, "Failed to get MCE status\n");
2485 else if (r && !(data->mce->status & MCI_STATUS_AR))
2486 return;
2487 r = kvm_set_mce(data->env, data->mce);
2488 if (r < 0) {
2489 perror("kvm_set_mce FAILED");
2490 if (data->abort_on_error)
2491 abort();
2494 #endif
2496 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2497 uint64_t mcg_status, uint64_t addr, uint64_t misc,
2498 int abort_on_error)
2500 #ifdef KVM_CAP_MCE
2501 struct kvm_x86_mce mce = {
2502 .bank = bank,
2503 .status = status,
2504 .mcg_status = mcg_status,
2505 .addr = addr,
2506 .misc = misc,
2508 struct kvm_x86_mce_data data = {
2509 .env = cenv,
2510 .mce = &mce,
2511 .abort_on_error = abort_on_error,
2514 if (!cenv->mcg_cap) {
2515 fprintf(stderr, "MCE support is not enabled!\n");
2516 return;
2518 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2519 #else
2520 if (abort_on_error)
2521 abort();
2522 #endif
2524 #endif