Move kvm cpu state flush to after kvm_arch_pre_run()
[qemu/qemu-dev-zwu.git] / qemu-kvm.c
blob44e8b75bdd4785aff39ebaf8a9849f61435576c4
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include "compatfd.h"
30 #include <sys/prctl.h>
32 #define false 0
33 #define true 1
35 #ifndef PR_MCE_KILL
36 #define PR_MCE_KILL 33
37 #endif
39 #ifndef BUS_MCEERR_AR
40 #define BUS_MCEERR_AR 4
41 #endif
42 #ifndef BUS_MCEERR_AO
43 #define BUS_MCEERR_AO 5
44 #endif
46 #define EXPECTED_KVM_API_VERSION 12
48 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
49 #error libkvm: userspace and kernel version mismatch
50 #endif
52 int kvm_allowed = 1;
53 int kvm_irqchip = 1;
54 int kvm_pit = 1;
55 int kvm_pit_reinject = 1;
56 int kvm_nested = 0;
59 KVMState *kvm_state;
60 kvm_context_t kvm_context;
62 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
63 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
64 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
65 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
66 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
67 __thread CPUState *current_env;
69 static int qemu_system_ready;
71 #define SIG_IPI (SIGRTMIN+4)
73 pthread_t io_thread;
74 static int io_thread_fd = -1;
75 static int io_thread_sigfd = -1;
77 static CPUState *kvm_debug_cpu_requested;
79 static uint64_t phys_ram_size;
81 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
82 /* The list of ioperm_data */
83 static QLIST_HEAD(, ioperm_data) ioperm_head;
84 #endif
86 //#define DEBUG_MEMREG
87 #ifdef DEBUG_MEMREG
88 #define DPRINTF(fmt, args...) \
89 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
90 #else
91 #define DPRINTF(fmt, args...) do {} while (0)
92 #endif
94 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
96 int kvm_abi = EXPECTED_KVM_API_VERSION;
97 int kvm_page_size;
99 #ifdef KVM_CAP_SET_GUEST_DEBUG
100 static int kvm_debug(CPUState *env,
101 struct kvm_debug_exit_arch *arch_info)
103 int handle = kvm_arch_debug(arch_info);
105 if (handle) {
106 kvm_debug_cpu_requested = env;
107 env->stopped = 1;
109 return handle;
111 #endif
113 static int handle_unhandled(uint64_t reason)
115 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
116 return -EINVAL;
120 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
122 uint32_t *bitmap = kvm->used_gsi_bitmap;
124 if (gsi < kvm->max_gsi)
125 bitmap[gsi / 32] |= 1U << (gsi % 32);
126 else
127 DPRINTF("Invalid GSI %u\n", gsi);
130 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
132 uint32_t *bitmap = kvm->used_gsi_bitmap;
134 if (gsi < kvm->max_gsi)
135 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
136 else
137 DPRINTF("Invalid GSI %u\n", gsi);
140 struct slot_info {
141 unsigned long phys_addr;
142 unsigned long len;
143 unsigned long userspace_addr;
144 unsigned flags;
145 int logging_count;
148 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
150 static void init_slots(void)
152 int i;
154 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
155 slots[i].len = 0;
158 static int get_free_slot(kvm_context_t kvm)
160 int i;
161 int tss_ext;
163 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
164 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
165 #else
166 tss_ext = 0;
167 #endif
170 * on older kernels where the set tss ioctl is not supprted we must save
171 * slot 0 to hold the extended memory, as the vmx will use the last 3
172 * pages of this slot.
174 if (tss_ext > 0)
175 i = 0;
176 else
177 i = 1;
179 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
180 if (!slots[i].len)
181 return i;
182 return -1;
185 static void register_slot(int slot, unsigned long phys_addr,
186 unsigned long len, unsigned long userspace_addr,
187 unsigned flags)
189 slots[slot].phys_addr = phys_addr;
190 slots[slot].len = len;
191 slots[slot].userspace_addr = userspace_addr;
192 slots[slot].flags = flags;
195 static void free_slot(int slot)
197 slots[slot].len = 0;
198 slots[slot].logging_count = 0;
201 static int get_slot(unsigned long phys_addr)
203 int i;
205 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
206 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
207 (slots[i].phys_addr + slots[i].len - 1) >= phys_addr)
208 return i;
210 return -1;
213 /* Returns -1 if this slot is not totally contained on any other,
214 * and the number of the slot otherwise */
215 static int get_container_slot(uint64_t phys_addr, unsigned long size)
217 int i;
219 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
220 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
221 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
222 return i;
223 return -1;
226 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr,
227 unsigned long size)
229 int slot = get_container_slot(phys_addr, size);
230 if (slot == -1)
231 return 0;
232 return 1;
236 * dirty pages logging control
238 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
239 unsigned long phys_addr, unsigned flags,
240 unsigned mask)
242 int r = -1;
243 int slot = get_slot(phys_addr);
245 if (slot == -1) {
246 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
247 return 1;
250 flags = (slots[slot].flags & ~mask) | flags;
251 if (flags == slots[slot].flags)
252 return 0;
253 slots[slot].flags = flags;
256 struct kvm_userspace_memory_region mem = {
257 .slot = slot,
258 .memory_size = slots[slot].len,
259 .guest_phys_addr = slots[slot].phys_addr,
260 .userspace_addr = slots[slot].userspace_addr,
261 .flags = slots[slot].flags,
265 DPRINTF("slot %d start %llx len %llx flags %x\n",
266 mem.slot, mem.guest_phys_addr, mem.memory_size, mem.flags);
267 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
268 if (r < 0)
269 fprintf(stderr, "%s: %m\n", __FUNCTION__);
271 return r;
274 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
275 int (*change)(kvm_context_t kvm,
276 uint64_t start,
277 uint64_t len))
279 int i, r;
281 for (i = r = 0; i < KVM_MAX_NUM_MEM_REGIONS && r == 0; i++) {
282 if (slots[i].len)
283 r = change(kvm, slots[i].phys_addr, slots[i].len);
285 return r;
288 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, uint64_t phys_addr,
289 uint64_t len)
291 int slot = get_slot(phys_addr);
293 DPRINTF("start %" PRIx64 " len %" PRIx64 "\n", phys_addr, len);
294 if (slot == -1) {
295 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
296 return -EINVAL;
299 if (slots[slot].logging_count++)
300 return 0;
302 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
303 KVM_MEM_LOG_DIRTY_PAGES,
304 KVM_MEM_LOG_DIRTY_PAGES);
307 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, uint64_t phys_addr,
308 uint64_t len)
310 int slot = get_slot(phys_addr);
312 if (slot == -1) {
313 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
314 return -EINVAL;
317 if (--slots[slot].logging_count)
318 return 0;
320 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, 0,
321 KVM_MEM_LOG_DIRTY_PAGES);
325 * Enable dirty page logging for all memory regions
327 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
329 if (kvm->dirty_pages_log_all)
330 return 0;
331 kvm->dirty_pages_log_all = 1;
332 return kvm_dirty_pages_log_change_all(kvm, kvm_dirty_pages_log_enable_slot);
336 * Enable dirty page logging only for memory regions that were created with
337 * dirty logging enabled (disable for all other memory regions).
339 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
341 if (!kvm->dirty_pages_log_all)
342 return 0;
343 kvm->dirty_pages_log_all = 0;
344 return kvm_dirty_pages_log_change_all(kvm,
345 kvm_dirty_pages_log_disable_slot);
349 static int kvm_create_context(void);
351 int kvm_init(int smp_cpus)
353 int fd;
354 int r, gsi_count;
357 fd = open("/dev/kvm", O_RDWR);
358 if (fd == -1) {
359 perror("open /dev/kvm");
360 return -1;
362 r = ioctl(fd, KVM_GET_API_VERSION, 0);
363 if (r == -1) {
364 fprintf(stderr,
365 "kvm kernel version too old: "
366 "KVM_GET_API_VERSION ioctl not supported\n");
367 goto out_close;
369 if (r < EXPECTED_KVM_API_VERSION) {
370 fprintf(stderr, "kvm kernel version too old: "
371 "We expect API version %d or newer, but got "
372 "version %d\n", EXPECTED_KVM_API_VERSION, r);
373 goto out_close;
375 if (r > EXPECTED_KVM_API_VERSION) {
376 fprintf(stderr, "kvm userspace version too old\n");
377 goto out_close;
379 kvm_abi = r;
380 kvm_page_size = getpagesize();
381 kvm_state = qemu_mallocz(sizeof(*kvm_state));
382 kvm_context = &kvm_state->kvm_context;
384 kvm_state->fd = fd;
385 kvm_state->vmfd = -1;
386 kvm_context->opaque = cpu_single_env;
387 kvm_context->dirty_pages_log_all = 0;
388 kvm_context->no_irqchip_creation = 0;
389 kvm_context->no_pit_creation = 0;
391 #ifdef KVM_CAP_SET_GUEST_DEBUG
392 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
393 #endif
395 gsi_count = kvm_get_gsi_count(kvm_context);
396 if (gsi_count > 0) {
397 int gsi_bits, i;
399 /* Round up so we can search ints using ffs */
400 gsi_bits = ALIGN(gsi_count, 32);
401 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
402 kvm_context->max_gsi = gsi_bits;
404 /* Mark any over-allocated bits as already in use */
405 for (i = gsi_count; i < gsi_bits; i++)
406 set_gsi(kvm_context, i);
409 pthread_mutex_lock(&qemu_mutex);
410 return kvm_create_context();
412 out_close:
413 close(fd);
414 return -1;
417 static void kvm_finalize(KVMState *s)
419 /* FIXME
420 if (kvm->vcpu_fd[0] != -1)
421 close(kvm->vcpu_fd[0]);
422 if (kvm->vm_fd != -1)
423 close(kvm->vm_fd);
425 close(s->fd);
426 free(s);
429 void kvm_disable_irqchip_creation(kvm_context_t kvm)
431 kvm->no_irqchip_creation = 1;
434 void kvm_disable_pit_creation(kvm_context_t kvm)
436 kvm->no_pit_creation = 1;
439 static void kvm_create_vcpu(CPUState *env, int id)
441 long mmap_size;
442 int r;
444 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
445 if (r < 0) {
446 fprintf(stderr, "kvm_create_vcpu: %m\n");
447 return;
450 env->kvm_fd = r;
451 env->kvm_state = kvm_state;
453 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
454 if (mmap_size < 0) {
455 fprintf(stderr, "get vcpu mmap size: %m\n");
456 goto err_fd;
458 env->kvm_run =
459 mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,
461 if (env->kvm_run == MAP_FAILED) {
462 fprintf(stderr, "mmap vcpu area: %m\n");
463 goto err_fd;
466 return;
467 err_fd:
468 close(env->kvm_fd);
471 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
473 #ifdef KVM_CAP_SET_BOOT_CPU_ID
474 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
475 if (r > 0)
476 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
477 return -ENOSYS;
478 #else
479 return -ENOSYS;
480 #endif
483 int kvm_create_vm(kvm_context_t kvm)
485 int fd;
486 #ifdef KVM_CAP_IRQ_ROUTING
487 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
488 kvm->nr_allocated_irq_routes = 0;
489 #endif
491 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
492 if (fd < 0) {
493 fprintf(stderr, "kvm_create_vm: %m\n");
494 return -1;
496 kvm_state->vmfd = fd;
497 return 0;
500 static int kvm_create_default_phys_mem(kvm_context_t kvm,
501 unsigned long phys_mem_bytes,
502 void **vm_mem)
504 #ifdef KVM_CAP_USER_MEMORY
505 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
506 if (r > 0)
507 return 0;
508 fprintf(stderr,
509 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
510 #else
511 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
512 #endif
513 return -1;
516 void kvm_create_irqchip(kvm_context_t kvm)
518 int r;
520 kvm->irqchip_in_kernel = 0;
521 #ifdef KVM_CAP_IRQCHIP
522 if (!kvm->no_irqchip_creation) {
523 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
524 if (r > 0) { /* kernel irqchip supported */
525 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
526 if (r >= 0) {
527 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
528 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
529 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
530 KVM_CAP_IRQ_INJECT_STATUS);
531 if (r > 0)
532 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
533 #endif
534 kvm->irqchip_in_kernel = 1;
535 } else
536 fprintf(stderr, "Create kernel PIC irqchip failed\n");
539 #endif
540 kvm_state->irqchip_in_kernel = kvm->irqchip_in_kernel;
543 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
545 int r;
547 r = kvm_create_vm(kvm);
548 if (r < 0)
549 return r;
550 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
551 if (r < 0)
552 return r;
553 init_slots();
554 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
555 if (r < 0)
556 return r;
557 kvm_create_irqchip(kvm);
559 return 0;
563 int kvm_register_phys_mem(kvm_context_t kvm,
564 unsigned long phys_start, void *userspace_addr,
565 unsigned long len, int log)
568 struct kvm_userspace_memory_region memory = {
569 .memory_size = len,
570 .guest_phys_addr = phys_start,
571 .userspace_addr = (unsigned long) (uintptr_t) userspace_addr,
572 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
574 int r;
576 memory.slot = get_free_slot(kvm);
577 DPRINTF
578 ("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %x\n",
579 memory.guest_phys_addr, memory.memory_size, memory.userspace_addr,
580 memory.slot, memory.flags);
581 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
582 if (r < 0) {
583 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
584 return -1;
586 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
587 memory.userspace_addr, memory.flags);
588 return 0;
592 /* destroy/free a whole slot.
593 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
595 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
596 unsigned long len)
598 int slot;
599 int r;
600 struct kvm_userspace_memory_region memory = {
601 .memory_size = 0,
602 .guest_phys_addr = phys_start,
603 .userspace_addr = 0,
604 .flags = 0,
607 slot = get_slot(phys_start);
609 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
610 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", __FUNCTION__,
611 slot);
612 return;
614 if (phys_start != slots[slot].phys_addr) {
615 fprintf(stderr,
616 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
617 __FUNCTION__, phys_start, slots[slot].phys_addr);
618 phys_start = slots[slot].phys_addr;
621 memory.slot = slot;
622 DPRINTF("slot %d start %llx len %llx flags %x\n",
623 memory.slot, memory.guest_phys_addr, memory.memory_size,
624 memory.flags);
625 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
626 if (r < 0) {
627 fprintf(stderr, "destroy_userspace_phys_mem: %s", strerror(-r));
628 return;
631 free_slot(memory.slot);
634 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr,
635 unsigned long size)
638 int slot = get_container_slot(phys_addr, size);
640 if (slot != -1) {
641 DPRINTF("Unregistering memory region %" PRIx64 " (%lx)\n", phys_addr, size);
642 kvm_destroy_phys_mem(kvm, phys_addr, size);
643 return;
647 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
649 int r;
650 struct kvm_dirty_log log = {
651 .slot = slot,
654 log.dirty_bitmap = buf;
656 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
657 if (r < 0)
658 return r;
659 return 0;
662 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
664 int slot;
666 slot = get_slot(phys_addr);
667 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
670 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
671 unsigned long len, void *opaque,
672 int (*cb)(unsigned long start,
673 unsigned long len, void *bitmap,
674 void *opaque))
676 int i;
677 int r;
678 unsigned long end_addr = phys_addr + len;
679 void *buf;
681 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
682 if ((slots[i].len && (uint64_t) slots[i].phys_addr >= phys_addr)
683 && ((uint64_t) slots[i].phys_addr + slots[i].len <= end_addr)) {
684 buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
685 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
686 if (r) {
687 qemu_free(buf);
688 return r;
690 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
691 qemu_free(buf);
692 if (r)
693 return r;
696 return 0;
699 #ifdef KVM_CAP_IRQCHIP
701 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
703 struct kvm_irq_level event;
704 int r;
706 if (!kvm->irqchip_in_kernel)
707 return 0;
708 event.level = level;
709 event.irq = irq;
710 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
711 if (r < 0)
712 perror("kvm_set_irq_level");
714 if (status) {
715 #ifdef KVM_CAP_IRQ_INJECT_STATUS
716 *status =
717 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
718 #else
719 *status = 1;
720 #endif
723 return 1;
726 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
728 int r;
730 if (!kvm->irqchip_in_kernel)
731 return 0;
732 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
733 if (r < 0) {
734 perror("kvm_get_irqchip\n");
736 return r;
739 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
741 int r;
743 if (!kvm->irqchip_in_kernel)
744 return 0;
745 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
746 if (r < 0) {
747 perror("kvm_set_irqchip\n");
749 return r;
752 #endif
754 static int handle_debug(CPUState *env)
756 #ifdef KVM_CAP_SET_GUEST_DEBUG
757 struct kvm_run *run = env->kvm_run;
759 return kvm_debug(env, &run->debug.arch);
760 #else
761 return 0;
762 #endif
765 int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
767 return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
770 int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
772 return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
775 int kvm_get_fpu(CPUState *env, struct kvm_fpu *fpu)
777 return kvm_vcpu_ioctl(env, KVM_GET_FPU, fpu);
780 int kvm_set_fpu(CPUState *env, struct kvm_fpu *fpu)
782 return kvm_vcpu_ioctl(env, KVM_SET_FPU, fpu);
785 int kvm_get_sregs(CPUState *env, struct kvm_sregs *sregs)
787 return kvm_vcpu_ioctl(env, KVM_GET_SREGS, sregs);
790 int kvm_set_sregs(CPUState *env, struct kvm_sregs *sregs)
792 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, sregs);
795 #ifdef KVM_CAP_MP_STATE
796 int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
798 int r;
800 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
801 if (r > 0)
802 return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
803 return -ENOSYS;
806 int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
808 int r;
810 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
811 if (r > 0)
812 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
813 return -ENOSYS;
815 #endif
817 static int handle_mmio(CPUState *env)
819 unsigned long addr = env->kvm_run->mmio.phys_addr;
820 struct kvm_run *kvm_run = env->kvm_run;
821 void *data = kvm_run->mmio.data;
823 /* hack: Red Hat 7.1 generates these weird accesses. */
824 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
825 return 0;
827 cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
828 return 0;
831 int handle_io_window(kvm_context_t kvm)
833 return 1;
836 int handle_shutdown(kvm_context_t kvm, CPUState *env)
838 /* stop the current vcpu from going back to guest mode */
839 env->stopped = 1;
841 qemu_system_reset_request();
842 return 1;
845 static inline void push_nmi(kvm_context_t kvm)
847 #ifdef KVM_CAP_USER_NMI
848 kvm_arch_push_nmi(kvm->opaque);
849 #endif /* KVM_CAP_USER_NMI */
852 void post_kvm_run(kvm_context_t kvm, CPUState *env)
854 pthread_mutex_lock(&qemu_mutex);
855 kvm_arch_post_run(env, env->kvm_run);
856 cpu_single_env = env;
859 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
861 kvm_arch_pre_run(env, env->kvm_run);
863 if (env->kvm_cpu_state.regs_modified) {
864 kvm_arch_put_registers(env);
865 env->kvm_cpu_state.regs_modified = 0;
868 pthread_mutex_unlock(&qemu_mutex);
869 return 0;
872 int kvm_is_ready_for_interrupt_injection(CPUState *env)
874 return env->kvm_run->ready_for_interrupt_injection;
877 static int kvm_handle_internal_error(kvm_context_t kvm,
878 CPUState *env,
879 struct kvm_run *run)
881 fprintf(stderr, "KVM internal error. Suberror: %d\n",
882 run->internal.suberror);
883 #ifdef KVM_CAP_INTERNAL_ERROR_DATA
884 if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
885 int i;
887 for (i = 0; i < run->internal.ndata; ++i) {
888 fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
889 i, (uint64_t)run->internal.data[i]);
892 #endif
893 kvm_show_regs(env);
894 if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION)
895 fprintf(stderr, "emulation failure, check dmesg for details\n");
896 vm_stop(0);
897 return 1;
900 int kvm_run(CPUState *env)
902 int r;
903 kvm_context_t kvm = &env->kvm_state->kvm_context;
904 struct kvm_run *run = env->kvm_run;
905 int fd = env->kvm_fd;
907 again:
908 push_nmi(kvm);
909 #if !defined(__s390__)
910 if (!kvm->irqchip_in_kernel)
911 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
912 #endif
914 r = pre_kvm_run(kvm, env);
915 if (r)
916 return r;
917 r = ioctl(fd, KVM_RUN, 0);
919 if (r == -1 && errno != EINTR && errno != EAGAIN) {
920 r = -errno;
921 post_kvm_run(kvm, env);
922 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
923 return r;
926 post_kvm_run(kvm, env);
928 #if defined(KVM_CAP_COALESCED_MMIO)
929 if (kvm_state->coalesced_mmio) {
930 struct kvm_coalesced_mmio_ring *ring =
931 (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
932 while (ring->first != ring->last) {
933 cpu_physical_memory_rw(ring->coalesced_mmio[ring->first].phys_addr,
934 &ring->coalesced_mmio[ring->first].data[0],
935 ring->coalesced_mmio[ring->first].len, 1);
936 smp_wmb();
937 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
940 #endif
942 #if !defined(__s390__)
943 if (r == -1) {
944 r = handle_io_window(kvm);
945 goto more;
947 #endif
948 if (1) {
949 switch (run->exit_reason) {
950 case KVM_EXIT_UNKNOWN:
951 r = handle_unhandled(run->hw.hardware_exit_reason);
952 break;
953 case KVM_EXIT_FAIL_ENTRY:
954 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
955 break;
956 case KVM_EXIT_EXCEPTION:
957 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
958 run->ex.error_code);
959 kvm_show_regs(env);
960 kvm_show_code(env);
961 abort();
962 break;
963 case KVM_EXIT_IO:
964 r = kvm_handle_io(run->io.port,
965 (uint8_t *)run + run->io.data_offset,
966 run->io.direction,
967 run->io.size,
968 run->io.count);
969 break;
970 case KVM_EXIT_DEBUG:
971 r = handle_debug(env);
972 break;
973 case KVM_EXIT_MMIO:
974 r = handle_mmio(env);
975 break;
976 case KVM_EXIT_HLT:
977 r = kvm_arch_halt(env);
978 break;
979 case KVM_EXIT_IRQ_WINDOW_OPEN:
980 break;
981 case KVM_EXIT_SHUTDOWN:
982 r = handle_shutdown(kvm, env);
983 break;
984 #if defined(__s390__)
985 case KVM_EXIT_S390_SIEIC:
986 r = kvm_s390_handle_intercept(kvm, env, run);
987 break;
988 case KVM_EXIT_S390_RESET:
989 r = kvm_s390_handle_reset(kvm, env, run);
990 break;
991 #endif
992 case KVM_EXIT_INTERNAL_ERROR:
993 r = kvm_handle_internal_error(kvm, env, run);
994 break;
995 default:
996 if (kvm_arch_run(env)) {
997 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
998 kvm_show_regs(env);
999 abort();
1001 break;
1004 more:
1005 if (!r)
1006 goto again;
1007 return r;
1010 int kvm_inject_irq(CPUState *env, unsigned irq)
1012 struct kvm_interrupt intr;
1014 intr.irq = irq;
1015 return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
1018 #ifdef KVM_CAP_SET_GUEST_DEBUG
1019 int kvm_set_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
1021 return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, dbg);
1023 #endif
1025 int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1027 struct kvm_signal_mask *sigmask;
1028 int r;
1030 if (!sigset) {
1031 return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1033 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1035 sigmask->len = 8;
1036 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1037 r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1038 free(sigmask);
1039 return r;
1042 int kvm_pit_in_kernel(kvm_context_t kvm)
1044 return kvm->pit_in_kernel;
1047 int kvm_inject_nmi(CPUState *env)
1049 #ifdef KVM_CAP_USER_NMI
1050 return kvm_vcpu_ioctl(env, KVM_NMI);
1051 #else
1052 return -ENOSYS;
1053 #endif
1056 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1058 int r = 0;
1059 kvm_state->coalesced_mmio = 0;
1060 #ifdef KVM_CAP_COALESCED_MMIO
1061 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1062 if (r > 0) {
1063 kvm_state->coalesced_mmio = r;
1064 return 0;
1066 #endif
1067 return r;
1070 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1071 int kvm_assign_pci_device(kvm_context_t kvm,
1072 struct kvm_assigned_pci_dev *assigned_dev)
1074 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1077 static int kvm_old_assign_irq(kvm_context_t kvm,
1078 struct kvm_assigned_irq *assigned_irq)
1080 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1083 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1084 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1086 int ret;
1088 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1089 if (ret > 0) {
1090 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1093 return kvm_old_assign_irq(kvm, assigned_irq);
1096 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1098 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1100 #else
1101 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1103 return kvm_old_assign_irq(kvm, assigned_irq);
1105 #endif
1106 #endif
1108 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1109 int kvm_deassign_pci_device(kvm_context_t kvm,
1110 struct kvm_assigned_pci_dev *assigned_dev)
1112 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1114 #endif
1116 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1118 int ret = 0;
1120 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1121 ret =
1122 kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1123 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1124 if (ret <= 0)
1125 ret = 0;
1126 #endif
1127 return ret;
1130 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1132 #ifdef KVM_CAP_REINJECT_CONTROL
1133 int r;
1134 struct kvm_reinject_control control;
1136 control.pit_reinject = pit_reinject;
1138 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1139 if (r > 0) {
1140 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1142 #endif
1143 return -ENOSYS;
1146 int kvm_has_gsi_routing(kvm_context_t kvm)
1148 int r = 0;
1150 #ifdef KVM_CAP_IRQ_ROUTING
1151 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1152 #endif
1153 return r;
1156 int kvm_get_gsi_count(kvm_context_t kvm)
1158 #ifdef KVM_CAP_IRQ_ROUTING
1159 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1160 #else
1161 return -EINVAL;
1162 #endif
1165 int kvm_clear_gsi_routes(kvm_context_t kvm)
1167 #ifdef KVM_CAP_IRQ_ROUTING
1168 kvm->irq_routes->nr = 0;
1169 return 0;
1170 #else
1171 return -EINVAL;
1172 #endif
1175 int kvm_add_routing_entry(kvm_context_t kvm,
1176 struct kvm_irq_routing_entry *entry)
1178 #ifdef KVM_CAP_IRQ_ROUTING
1179 struct kvm_irq_routing *z;
1180 struct kvm_irq_routing_entry *new;
1181 int n, size;
1183 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1184 n = kvm->nr_allocated_irq_routes * 2;
1185 if (n < 64)
1186 n = 64;
1187 size = sizeof(struct kvm_irq_routing);
1188 size += n * sizeof(*new);
1189 z = realloc(kvm->irq_routes, size);
1190 if (!z)
1191 return -ENOMEM;
1192 kvm->nr_allocated_irq_routes = n;
1193 kvm->irq_routes = z;
1195 n = kvm->irq_routes->nr++;
1196 new = &kvm->irq_routes->entries[n];
1197 memset(new, 0, sizeof(*new));
1198 new->gsi = entry->gsi;
1199 new->type = entry->type;
1200 new->flags = entry->flags;
1201 new->u = entry->u;
1203 set_gsi(kvm, entry->gsi);
1205 return 0;
1206 #else
1207 return -ENOSYS;
1208 #endif
1211 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1213 #ifdef KVM_CAP_IRQ_ROUTING
1214 struct kvm_irq_routing_entry e;
1216 e.gsi = gsi;
1217 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1218 e.flags = 0;
1219 e.u.irqchip.irqchip = irqchip;
1220 e.u.irqchip.pin = pin;
1221 return kvm_add_routing_entry(kvm, &e);
1222 #else
1223 return -ENOSYS;
1224 #endif
1227 int kvm_del_routing_entry(kvm_context_t kvm,
1228 struct kvm_irq_routing_entry *entry)
1230 #ifdef KVM_CAP_IRQ_ROUTING
1231 struct kvm_irq_routing_entry *e, *p;
1232 int i, gsi, found = 0;
1234 gsi = entry->gsi;
1236 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1237 e = &kvm->irq_routes->entries[i];
1238 if (e->type == entry->type && e->gsi == gsi) {
1239 switch (e->type) {
1240 case KVM_IRQ_ROUTING_IRQCHIP:{
1241 if (e->u.irqchip.irqchip ==
1242 entry->u.irqchip.irqchip
1243 && e->u.irqchip.pin == entry->u.irqchip.pin) {
1244 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1245 *e = *p;
1246 found = 1;
1248 break;
1250 case KVM_IRQ_ROUTING_MSI:{
1251 if (e->u.msi.address_lo ==
1252 entry->u.msi.address_lo
1253 && e->u.msi.address_hi ==
1254 entry->u.msi.address_hi
1255 && e->u.msi.data == entry->u.msi.data) {
1256 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1257 *e = *p;
1258 found = 1;
1260 break;
1262 default:
1263 break;
1265 if (found) {
1266 /* If there are no other users of this GSI
1267 * mark it available in the bitmap */
1268 for (i = 0; i < kvm->irq_routes->nr; i++) {
1269 e = &kvm->irq_routes->entries[i];
1270 if (e->gsi == gsi)
1271 break;
1273 if (i == kvm->irq_routes->nr)
1274 clear_gsi(kvm, gsi);
1276 return 0;
1280 return -ESRCH;
1281 #else
1282 return -ENOSYS;
1283 #endif
1286 int kvm_update_routing_entry(kvm_context_t kvm,
1287 struct kvm_irq_routing_entry *entry,
1288 struct kvm_irq_routing_entry *newentry)
1290 #ifdef KVM_CAP_IRQ_ROUTING
1291 struct kvm_irq_routing_entry *e;
1292 int i;
1294 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
1295 return -EINVAL;
1298 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1299 e = &kvm->irq_routes->entries[i];
1300 if (e->type != entry->type || e->gsi != entry->gsi) {
1301 continue;
1303 switch (e->type) {
1304 case KVM_IRQ_ROUTING_IRQCHIP:
1305 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1306 e->u.irqchip.pin == entry->u.irqchip.pin) {
1307 memcpy(&e->u.irqchip, &newentry->u.irqchip,
1308 sizeof e->u.irqchip);
1309 return 0;
1311 break;
1312 case KVM_IRQ_ROUTING_MSI:
1313 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1314 e->u.msi.address_hi == entry->u.msi.address_hi &&
1315 e->u.msi.data == entry->u.msi.data) {
1316 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1317 return 0;
1319 break;
1320 default:
1321 break;
1324 return -ESRCH;
1325 #else
1326 return -ENOSYS;
1327 #endif
1330 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1332 #ifdef KVM_CAP_IRQ_ROUTING
1333 struct kvm_irq_routing_entry e;
1335 e.gsi = gsi;
1336 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1337 e.flags = 0;
1338 e.u.irqchip.irqchip = irqchip;
1339 e.u.irqchip.pin = pin;
1340 return kvm_del_routing_entry(kvm, &e);
1341 #else
1342 return -ENOSYS;
1343 #endif
1346 int kvm_commit_irq_routes(kvm_context_t kvm)
1348 #ifdef KVM_CAP_IRQ_ROUTING
1349 kvm->irq_routes->flags = 0;
1350 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1351 #else
1352 return -ENOSYS;
1353 #endif
1356 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1358 int i, bit;
1359 uint32_t *buf = kvm->used_gsi_bitmap;
1361 /* Return the lowest unused GSI in the bitmap */
1362 for (i = 0; i < kvm->max_gsi / 32; i++) {
1363 bit = ffs(~buf[i]);
1364 if (!bit)
1365 continue;
1367 return bit - 1 + i * 32;
1370 return -ENOSPC;
1373 #ifdef KVM_CAP_DEVICE_MSIX
1374 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1375 struct kvm_assigned_msix_nr *msix_nr)
1377 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1380 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1381 struct kvm_assigned_msix_entry *entry)
1383 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1385 #endif
1387 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1389 #include <sys/eventfd.h>
1391 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1393 struct kvm_irqfd data = {
1394 .fd = fd,
1395 .gsi = gsi,
1396 .flags = flags,
1399 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1402 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1404 int r;
1405 int fd;
1407 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1408 return -ENOENT;
1410 fd = eventfd(0, 0);
1411 if (fd < 0)
1412 return -errno;
1414 r = _kvm_irqfd(kvm, fd, gsi, 0);
1415 if (r < 0) {
1416 close(fd);
1417 return -errno;
1420 return fd;
1423 #else /* KVM_CAP_IRQFD */
1425 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1427 return -ENOSYS;
1430 #endif /* KVM_CAP_IRQFD */
1431 static inline unsigned long kvm_get_thread_id(void)
1433 return syscall(SYS_gettid);
1436 static void qemu_cond_wait(pthread_cond_t *cond)
1438 CPUState *env = cpu_single_env;
1440 pthread_cond_wait(cond, &qemu_mutex);
1441 cpu_single_env = env;
1444 static void sig_ipi_handler(int n)
1448 static void hardware_memory_error(void)
1450 fprintf(stderr, "Hardware memory error!\n");
1451 exit(1);
1454 static void sigbus_reraise(void)
1456 sigset_t set;
1457 struct sigaction action;
1459 memset(&action, 0, sizeof(action));
1460 action.sa_handler = SIG_DFL;
1461 if (!sigaction(SIGBUS, &action, NULL)) {
1462 raise(SIGBUS);
1463 sigemptyset(&set);
1464 sigaddset(&set, SIGBUS);
1465 sigprocmask(SIG_UNBLOCK, &set, NULL);
1467 perror("Failed to re-raise SIGBUS!\n");
1468 abort();
1471 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
1472 void *ctx)
1474 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1475 if (first_cpu->mcg_cap && siginfo->ssi_addr
1476 && siginfo->ssi_code == BUS_MCEERR_AO) {
1477 uint64_t status;
1478 unsigned long paddr;
1479 CPUState *cenv;
1481 /* Hope we are lucky for AO MCE */
1482 if (do_qemu_ram_addr_from_host((void *)(intptr_t)siginfo->ssi_addr,
1483 &paddr)) {
1484 fprintf(stderr, "Hardware memory error for memory used by "
1485 "QEMU itself instead of guest system!: %llx\n",
1486 (unsigned long long)siginfo->ssi_addr);
1487 return;
1489 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1490 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1491 | 0xc0;
1492 kvm_inject_x86_mce(first_cpu, 9, status,
1493 MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
1494 (MCM_ADDR_PHYS << 6) | 0xc, 1);
1495 for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu)
1496 kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
1497 MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
1498 } else
1499 #endif
1501 if (siginfo->ssi_code == BUS_MCEERR_AO)
1502 return;
1503 else if (siginfo->ssi_code == BUS_MCEERR_AR)
1504 hardware_memory_error();
1505 else
1506 sigbus_reraise();
1510 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1512 struct qemu_work_item wi;
1514 if (env == current_env) {
1515 func(data);
1516 return;
1519 wi.func = func;
1520 wi.data = data;
1521 if (!env->kvm_cpu_state.queued_work_first)
1522 env->kvm_cpu_state.queued_work_first = &wi;
1523 else
1524 env->kvm_cpu_state.queued_work_last->next = &wi;
1525 env->kvm_cpu_state.queued_work_last = &wi;
1526 wi.next = NULL;
1527 wi.done = false;
1529 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1530 while (!wi.done)
1531 qemu_cond_wait(&qemu_work_cond);
1534 void kvm_arch_get_registers(CPUState *env)
1536 kvm_arch_save_regs(env);
1539 static void do_kvm_cpu_synchronize_state(void *_env)
1541 CPUState *env = _env;
1542 if (!env->kvm_cpu_state.regs_modified) {
1543 kvm_arch_get_registers(env);
1544 env->kvm_cpu_state.regs_modified = 1;
1548 void kvm_cpu_synchronize_state(CPUState *env)
1550 if (!env->kvm_cpu_state.regs_modified)
1551 on_vcpu(env, do_kvm_cpu_synchronize_state, env);
1554 static void inject_interrupt(void *data)
1556 cpu_interrupt(current_env, (long) data);
1559 void kvm_inject_interrupt(CPUState *env, int mask)
1561 on_vcpu(env, inject_interrupt, (void *) (long) mask);
1564 void kvm_update_interrupt_request(CPUState *env)
1566 int signal = 0;
1568 if (env) {
1569 if (!current_env || !current_env->created)
1570 signal = 1;
1572 * Testing for created here is really redundant
1574 if (current_env && current_env->created &&
1575 env != current_env && !env->kvm_cpu_state.signalled)
1576 signal = 1;
1578 if (signal) {
1579 env->kvm_cpu_state.signalled = 1;
1580 if (env->kvm_cpu_state.thread)
1581 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1586 static void kvm_do_load_registers(void *_env)
1588 CPUState *env = _env;
1590 kvm_arch_load_regs(env);
1593 void kvm_load_registers(CPUState *env)
1595 if (kvm_enabled() && qemu_system_ready)
1596 on_vcpu(env, kvm_do_load_registers, env);
1599 static void kvm_do_save_registers(void *_env)
1601 CPUState *env = _env;
1603 kvm_arch_save_regs(env);
1606 void kvm_save_registers(CPUState *env)
1608 if (kvm_enabled())
1609 on_vcpu(env, kvm_do_save_registers, env);
1612 static void kvm_do_load_mpstate(void *_env)
1614 CPUState *env = _env;
1616 kvm_arch_load_mpstate(env);
1619 void kvm_load_mpstate(CPUState *env)
1621 if (kvm_enabled() && qemu_system_ready)
1622 on_vcpu(env, kvm_do_load_mpstate, env);
1625 static void kvm_do_save_mpstate(void *_env)
1627 CPUState *env = _env;
1629 kvm_arch_save_mpstate(env);
1630 #ifdef KVM_CAP_MP_STATE
1631 if (kvm_irqchip_in_kernel())
1632 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1633 #endif
1636 void kvm_save_mpstate(CPUState *env)
1638 if (kvm_enabled())
1639 on_vcpu(env, kvm_do_save_mpstate, env);
1642 int kvm_cpu_exec(CPUState *env)
1644 int r;
1646 r = kvm_run(env);
1647 if (r < 0) {
1648 printf("kvm_run returned %d\n", r);
1649 vm_stop(0);
1652 return 0;
1655 static int is_cpu_stopped(CPUState *env)
1657 return !vm_running || env->stopped;
1660 static void flush_queued_work(CPUState *env)
1662 struct qemu_work_item *wi;
1664 if (!env->kvm_cpu_state.queued_work_first)
1665 return;
1667 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1668 env->kvm_cpu_state.queued_work_first = wi->next;
1669 wi->func(wi->data);
1670 wi->done = true;
1672 env->kvm_cpu_state.queued_work_last = NULL;
1673 pthread_cond_broadcast(&qemu_work_cond);
1676 static void kvm_on_sigbus(CPUState *env, siginfo_t *siginfo)
1678 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1679 struct kvm_x86_mce mce = {
1680 .bank = 9,
1682 unsigned long paddr;
1683 int r;
1685 if (env->mcg_cap && siginfo->si_addr
1686 && (siginfo->si_code == BUS_MCEERR_AR
1687 || siginfo->si_code == BUS_MCEERR_AO)) {
1688 if (siginfo->si_code == BUS_MCEERR_AR) {
1689 /* Fake an Intel architectural Data Load SRAR UCR */
1690 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1691 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1692 | MCI_STATUS_AR | 0x134;
1693 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1694 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
1695 } else {
1696 /* Fake an Intel architectural Memory scrubbing UCR */
1697 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1698 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1699 | 0xc0;
1700 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1701 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
1703 if (do_qemu_ram_addr_from_host((void *)siginfo->si_addr, &paddr)) {
1704 fprintf(stderr, "Hardware memory error for memory used by "
1705 "QEMU itself instaed of guest system!\n");
1706 /* Hope we are lucky for AO MCE */
1707 if (siginfo->si_code == BUS_MCEERR_AO)
1708 return;
1709 else
1710 hardware_memory_error();
1712 mce.addr = paddr;
1713 r = kvm_set_mce(env, &mce);
1714 if (r < 0) {
1715 fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1716 abort();
1718 } else
1719 #endif
1721 if (siginfo->si_code == BUS_MCEERR_AO)
1722 return;
1723 else if (siginfo->si_code == BUS_MCEERR_AR)
1724 hardware_memory_error();
1725 else
1726 sigbus_reraise();
1730 static void kvm_main_loop_wait(CPUState *env, int timeout)
1732 struct timespec ts;
1733 int r, e;
1734 siginfo_t siginfo;
1735 sigset_t waitset;
1736 sigset_t chkset;
1738 ts.tv_sec = timeout / 1000;
1739 ts.tv_nsec = (timeout % 1000) * 1000000;
1740 sigemptyset(&waitset);
1741 sigaddset(&waitset, SIG_IPI);
1742 sigaddset(&waitset, SIGBUS);
1744 do {
1745 pthread_mutex_unlock(&qemu_mutex);
1747 r = sigtimedwait(&waitset, &siginfo, &ts);
1748 e = errno;
1750 pthread_mutex_lock(&qemu_mutex);
1752 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1753 printf("sigtimedwait: %s\n", strerror(e));
1754 exit(1);
1757 switch (r) {
1758 case SIGBUS:
1759 kvm_on_sigbus(env, &siginfo);
1760 break;
1761 default:
1762 break;
1765 r = sigpending(&chkset);
1766 if (r == -1) {
1767 printf("sigpending: %s\n", strerror(e));
1768 exit(1);
1770 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1772 cpu_single_env = env;
1773 flush_queued_work(env);
1775 if (env->stop) {
1776 env->stop = 0;
1777 env->stopped = 1;
1778 pthread_cond_signal(&qemu_pause_cond);
1781 env->kvm_cpu_state.signalled = 0;
1784 static int all_threads_paused(void)
1786 CPUState *penv = first_cpu;
1788 while (penv) {
1789 if (penv->stop)
1790 return 0;
1791 penv = (CPUState *) penv->next_cpu;
1794 return 1;
1797 static void pause_all_threads(void)
1799 CPUState *penv = first_cpu;
1801 while (penv) {
1802 if (penv != cpu_single_env) {
1803 penv->stop = 1;
1804 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1805 } else {
1806 penv->stop = 0;
1807 penv->stopped = 1;
1808 cpu_exit(penv);
1810 penv = (CPUState *) penv->next_cpu;
1813 while (!all_threads_paused())
1814 qemu_cond_wait(&qemu_pause_cond);
1817 static void resume_all_threads(void)
1819 CPUState *penv = first_cpu;
1821 assert(!cpu_single_env);
1823 while (penv) {
1824 penv->stop = 0;
1825 penv->stopped = 0;
1826 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1827 penv = (CPUState *) penv->next_cpu;
1831 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1833 if (running)
1834 resume_all_threads();
1835 else
1836 pause_all_threads();
1839 static void setup_kernel_sigmask(CPUState *env)
1841 sigset_t set;
1843 sigemptyset(&set);
1844 sigaddset(&set, SIGUSR2);
1845 sigaddset(&set, SIGIO);
1846 sigaddset(&set, SIGALRM);
1847 sigprocmask(SIG_BLOCK, &set, NULL);
1849 sigprocmask(SIG_BLOCK, NULL, &set);
1850 sigdelset(&set, SIG_IPI);
1851 sigdelset(&set, SIGBUS);
1853 kvm_set_signal_mask(env, &set);
1856 static void qemu_kvm_system_reset(void)
1858 CPUState *penv = first_cpu;
1860 pause_all_threads();
1862 qemu_system_reset();
1864 while (penv) {
1865 kvm_arch_cpu_reset(penv);
1866 penv = (CPUState *) penv->next_cpu;
1869 resume_all_threads();
1872 static void process_irqchip_events(CPUState *env)
1874 kvm_arch_process_irqchip_events(env);
1875 if (kvm_arch_has_work(env))
1876 env->halted = 0;
1879 static int kvm_main_loop_cpu(CPUState *env)
1881 while (1) {
1882 int run_cpu = !is_cpu_stopped(env);
1883 if (run_cpu && !kvm_irqchip_in_kernel()) {
1884 process_irqchip_events(env);
1885 run_cpu = !env->halted;
1887 if (run_cpu) {
1888 kvm_cpu_exec(env);
1889 kvm_main_loop_wait(env, 0);
1890 } else {
1891 kvm_main_loop_wait(env, 1000);
1894 pthread_mutex_unlock(&qemu_mutex);
1895 return 0;
1898 static void *ap_main_loop(void *_env)
1900 CPUState *env = _env;
1901 sigset_t signals;
1902 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1903 struct ioperm_data *data = NULL;
1904 #endif
1906 current_env = env;
1907 env->thread_id = kvm_get_thread_id();
1908 sigfillset(&signals);
1909 sigprocmask(SIG_BLOCK, &signals, NULL);
1910 kvm_create_vcpu(env, env->cpu_index);
1912 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1913 /* do ioperm for io ports of assigned devices */
1914 QLIST_FOREACH(data, &ioperm_head, entries)
1915 on_vcpu(env, kvm_arch_do_ioperm, data);
1916 #endif
1918 setup_kernel_sigmask(env);
1920 pthread_mutex_lock(&qemu_mutex);
1921 cpu_single_env = env;
1923 kvm_arch_init_vcpu(env);
1925 kvm_arch_load_regs(env);
1927 /* signal VCPU creation */
1928 current_env->created = 1;
1929 pthread_cond_signal(&qemu_vcpu_cond);
1931 /* and wait for machine initialization */
1932 while (!qemu_system_ready)
1933 qemu_cond_wait(&qemu_system_cond);
1935 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1936 cpu_single_env = env;
1938 kvm_main_loop_cpu(env);
1939 return NULL;
1942 void kvm_init_vcpu(CPUState *env)
1944 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1946 while (env->created == 0)
1947 qemu_cond_wait(&qemu_vcpu_cond);
1950 int kvm_vcpu_inited(CPUState *env)
1952 return env->created;
1955 #ifdef TARGET_I386
1956 void kvm_hpet_disable_kpit(void)
1958 struct kvm_pit_state2 ps2;
1960 kvm_get_pit2(kvm_context, &ps2);
1961 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1962 kvm_set_pit2(kvm_context, &ps2);
1965 void kvm_hpet_enable_kpit(void)
1967 struct kvm_pit_state2 ps2;
1969 kvm_get_pit2(kvm_context, &ps2);
1970 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1971 kvm_set_pit2(kvm_context, &ps2);
1973 #endif
1975 int kvm_init_ap(void)
1977 struct sigaction action;
1979 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1981 signal(SIG_IPI, sig_ipi_handler);
1983 memset(&action, 0, sizeof(action));
1984 action.sa_flags = SA_SIGINFO;
1985 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
1986 sigaction(SIGBUS, &action, NULL);
1987 prctl(PR_MCE_KILL, 1, 1);
1988 return 0;
1991 void qemu_kvm_notify_work(void)
1993 uint64_t value = 1;
1994 char buffer[8];
1995 size_t offset = 0;
1997 if (io_thread_fd == -1)
1998 return;
2000 memcpy(buffer, &value, sizeof(value));
2002 while (offset < 8) {
2003 ssize_t len;
2005 len = write(io_thread_fd, buffer + offset, 8 - offset);
2006 if (len == -1 && errno == EINTR)
2007 continue;
2009 /* In case we have a pipe, there is not reason to insist writing
2010 * 8 bytes
2012 if (len == -1 && errno == EAGAIN)
2013 break;
2015 if (len <= 0)
2016 break;
2018 offset += len;
2022 /* If we have signalfd, we mask out the signals we want to handle and then
2023 * use signalfd to listen for them. We rely on whatever the current signal
2024 * handler is to dispatch the signals when we receive them.
2027 static void sigfd_handler(void *opaque)
2029 int fd = (unsigned long) opaque;
2030 struct qemu_signalfd_siginfo info;
2031 struct sigaction action;
2032 ssize_t len;
2034 while (1) {
2035 do {
2036 len = read(fd, &info, sizeof(info));
2037 } while (len == -1 && errno == EINTR);
2039 if (len == -1 && errno == EAGAIN)
2040 break;
2042 if (len != sizeof(info)) {
2043 printf("read from sigfd returned %zd: %m\n", len);
2044 return;
2047 sigaction(info.ssi_signo, NULL, &action);
2048 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction)
2049 action.sa_sigaction(info.ssi_signo,
2050 (siginfo_t *)&info, NULL);
2051 else if (action.sa_handler)
2052 action.sa_handler(info.ssi_signo);
2057 /* Used to break IO thread out of select */
2058 static void io_thread_wakeup(void *opaque)
2060 int fd = (unsigned long) opaque;
2061 char buffer[4096];
2063 /* Drain the pipe/(eventfd) */
2064 while (1) {
2065 ssize_t len;
2067 len = read(fd, buffer, sizeof(buffer));
2068 if (len == -1 && errno == EINTR)
2069 continue;
2071 if (len <= 0)
2072 break;
2076 int kvm_main_loop(void)
2078 int fds[2];
2079 sigset_t mask;
2080 int sigfd;
2082 io_thread = pthread_self();
2083 qemu_system_ready = 1;
2085 if (qemu_eventfd(fds) == -1) {
2086 fprintf(stderr, "failed to create eventfd\n");
2087 return -errno;
2090 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2091 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2093 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2094 (void *)(unsigned long) fds[0]);
2096 io_thread_fd = fds[1];
2098 sigemptyset(&mask);
2099 sigaddset(&mask, SIGIO);
2100 sigaddset(&mask, SIGALRM);
2101 sigaddset(&mask, SIGBUS);
2102 sigprocmask(SIG_BLOCK, &mask, NULL);
2104 sigfd = qemu_signalfd(&mask);
2105 if (sigfd == -1) {
2106 fprintf(stderr, "failed to create signalfd\n");
2107 return -errno;
2110 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2112 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2113 (void *)(unsigned long) sigfd);
2115 pthread_cond_broadcast(&qemu_system_cond);
2117 io_thread_sigfd = sigfd;
2118 cpu_single_env = NULL;
2120 while (1) {
2121 main_loop_wait(1000);
2122 if (qemu_shutdown_requested()) {
2123 if (qemu_no_shutdown()) {
2124 vm_stop(0);
2125 } else
2126 break;
2127 } else if (qemu_powerdown_requested())
2128 qemu_irq_raise(qemu_system_powerdown);
2129 else if (qemu_reset_requested())
2130 qemu_kvm_system_reset();
2131 else if (kvm_debug_cpu_requested) {
2132 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2133 vm_stop(EXCP_DEBUG);
2134 kvm_debug_cpu_requested = NULL;
2138 pause_all_threads();
2139 pthread_mutex_unlock(&qemu_mutex);
2141 return 0;
2144 #ifdef TARGET_I386
2145 static int destroy_region_works = 0;
2146 #endif
2149 #if !defined(TARGET_I386)
2150 int kvm_arch_init_irq_routing(void)
2152 return 0;
2154 #endif
2156 extern int no_hpet;
2158 static int kvm_create_context(void)
2160 int r;
2162 if (!kvm_irqchip) {
2163 kvm_disable_irqchip_creation(kvm_context);
2165 if (!kvm_pit) {
2166 kvm_disable_pit_creation(kvm_context);
2168 if (kvm_create(kvm_context, 0, NULL) < 0) {
2169 kvm_finalize(kvm_state);
2170 return -1;
2172 r = kvm_arch_qemu_create_context();
2173 if (r < 0)
2174 kvm_finalize(kvm_state);
2175 if (kvm_pit && !kvm_pit_reinject) {
2176 if (kvm_reinject_control(kvm_context, 0)) {
2177 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2178 return -1;
2181 #ifdef TARGET_I386
2182 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2183 #endif
2185 r = kvm_arch_init_irq_routing();
2186 if (r < 0) {
2187 return r;
2190 kvm_init_ap();
2191 if (kvm_irqchip) {
2192 if (!qemu_kvm_has_gsi_routing()) {
2193 irq0override = 0;
2194 #ifdef TARGET_I386
2195 /* if kernel can't do irq routing, interrupt source
2196 * override 0->2 can not be set up as required by hpet,
2197 * so disable hpet.
2199 no_hpet = 1;
2200 } else if (!qemu_kvm_has_pit_state2()) {
2201 no_hpet = 1;
2203 #else
2205 #endif
2208 return 0;
2211 #ifdef TARGET_I386
2212 static int must_use_aliases_source(target_phys_addr_t addr)
2214 if (destroy_region_works)
2215 return false;
2216 if (addr == 0xa0000 || addr == 0xa8000)
2217 return true;
2218 return false;
2221 static int must_use_aliases_target(target_phys_addr_t addr)
2223 if (destroy_region_works)
2224 return false;
2225 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2226 return true;
2227 return false;
2230 static struct mapping {
2231 target_phys_addr_t phys;
2232 ram_addr_t ram;
2233 ram_addr_t len;
2234 } mappings[50];
2235 static int nr_mappings;
2237 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2239 struct mapping *p;
2241 for (p = mappings; p < mappings + nr_mappings; ++p) {
2242 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2243 return p;
2246 return NULL;
2249 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2251 struct mapping *p;
2253 for (p = mappings; p < mappings + nr_mappings; ++p) {
2254 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2255 return p;
2258 return NULL;
2261 static void drop_mapping(target_phys_addr_t start_addr)
2263 struct mapping *p = find_mapping(start_addr);
2265 if (p)
2266 *p = mappings[--nr_mappings];
2268 #endif
2270 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2271 ram_addr_t phys_offset)
2273 int r = 0;
2274 unsigned long area_flags;
2275 #ifdef TARGET_I386
2276 struct mapping *p;
2277 #endif
2279 if (start_addr + size > phys_ram_size) {
2280 phys_ram_size = start_addr + size;
2283 phys_offset &= ~IO_MEM_ROM;
2284 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2286 if (area_flags != IO_MEM_RAM) {
2287 #ifdef TARGET_I386
2288 if (must_use_aliases_source(start_addr)) {
2289 kvm_destroy_memory_alias(kvm_context, start_addr);
2290 return;
2292 if (must_use_aliases_target(start_addr))
2293 return;
2294 #endif
2295 while (size > 0) {
2296 p = find_mapping(start_addr);
2297 if (p) {
2298 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2299 drop_mapping(p->phys);
2301 start_addr += TARGET_PAGE_SIZE;
2302 if (size > TARGET_PAGE_SIZE) {
2303 size -= TARGET_PAGE_SIZE;
2304 } else {
2305 size = 0;
2308 return;
2311 r = kvm_is_containing_region(kvm_context, start_addr, size);
2312 if (r)
2313 return;
2315 if (area_flags >= TLB_MMIO)
2316 return;
2318 #ifdef TARGET_I386
2319 if (must_use_aliases_source(start_addr)) {
2320 p = find_ram_mapping(phys_offset);
2321 if (p) {
2322 kvm_create_memory_alias(kvm_context, start_addr, size,
2323 p->phys + (phys_offset - p->ram));
2325 return;
2327 #endif
2329 r = kvm_register_phys_mem(kvm_context, start_addr,
2330 qemu_get_ram_ptr(phys_offset), size, 0);
2331 if (r < 0) {
2332 printf("kvm_cpu_register_physical_memory: failed\n");
2333 exit(1);
2335 #ifdef TARGET_I386
2336 drop_mapping(start_addr);
2337 p = &mappings[nr_mappings++];
2338 p->phys = start_addr;
2339 p->ram = phys_offset;
2340 p->len = size;
2341 #endif
2343 return;
2346 int kvm_setup_guest_memory(void *area, unsigned long size)
2348 int ret = 0;
2350 #ifdef MADV_DONTFORK
2351 if (kvm_enabled() && !kvm_has_sync_mmu())
2352 ret = madvise(area, size, MADV_DONTFORK);
2353 #endif
2355 if (ret)
2356 perror("madvise");
2358 return ret;
2361 #ifdef KVM_CAP_SET_GUEST_DEBUG
2363 struct kvm_set_guest_debug_data {
2364 struct kvm_guest_debug dbg;
2365 int err;
2368 static void kvm_invoke_set_guest_debug(void *data)
2370 struct kvm_set_guest_debug_data *dbg_data = data;
2372 if (cpu_single_env->kvm_cpu_state.regs_modified) {
2373 kvm_arch_put_registers(cpu_single_env);
2374 cpu_single_env->kvm_cpu_state.regs_modified = 0;
2376 dbg_data->err =
2377 kvm_set_guest_debug(cpu_single_env,
2378 &dbg_data->dbg);
2381 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2383 struct kvm_set_guest_debug_data data;
2385 data.dbg.control = 0;
2386 if (env->singlestep_enabled)
2387 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2389 kvm_arch_update_guest_debug(env, &data.dbg);
2390 data.dbg.control |= reinject_trap;
2392 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2393 return data.err;
2396 #endif
2399 * dirty pages logging
2401 /* FIXME: use unsigned long pointer instead of unsigned char */
2402 unsigned char *kvm_dirty_bitmap = NULL;
2403 int kvm_physical_memory_set_dirty_tracking(int enable)
2405 int r = 0;
2407 if (!kvm_enabled())
2408 return 0;
2410 if (enable) {
2411 if (!kvm_dirty_bitmap) {
2412 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2413 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2414 r = kvm_dirty_pages_log_enable_all(kvm_context);
2416 } else {
2417 if (kvm_dirty_bitmap) {
2418 r = kvm_dirty_pages_log_reset(kvm_context);
2419 qemu_free(kvm_dirty_bitmap);
2420 kvm_dirty_bitmap = NULL;
2423 return r;
2426 /* get kvm's dirty pages bitmap and update qemu's */
2427 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2428 unsigned char *bitmap,
2429 unsigned long offset,
2430 unsigned long mem_size)
2432 unsigned int i, j, n = 0;
2433 unsigned char c;
2434 unsigned long page_number, addr, addr1;
2435 ram_addr_t ram_addr;
2436 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + 7) / 8;
2439 * bitmap-traveling is faster than memory-traveling (for addr...)
2440 * especially when most of the memory is not dirty.
2442 for (i = 0; i < len; i++) {
2443 c = bitmap[i];
2444 while (c > 0) {
2445 j = ffsl(c) - 1;
2446 c &= ~(1u << j);
2447 page_number = i * 8 + j;
2448 addr1 = page_number * TARGET_PAGE_SIZE;
2449 addr = offset + addr1;
2450 ram_addr = cpu_get_physical_page_desc(addr);
2451 cpu_physical_memory_set_dirty(ram_addr);
2452 n++;
2455 return 0;
2458 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2459 void *bitmap, void *opaque)
2461 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2465 * get kvm's dirty pages bitmap and update qemu's
2466 * we only care about physical ram, which resides in slots 0 and 3
2468 int kvm_update_dirty_pages_log(void)
2470 int r = 0;
2473 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL, NULL,
2474 kvm_get_dirty_bitmap_cb);
2475 return r;
2478 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2479 int log)
2481 if (log)
2482 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2483 else {
2484 #ifdef TARGET_I386
2485 if (must_use_aliases_target(start))
2486 return;
2487 #endif
2488 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2492 #ifdef KVM_CAP_IRQCHIP
2494 int kvm_set_irq(int irq, int level, int *status)
2496 return kvm_set_irq_level(kvm_context, irq, level, status);
2499 #endif
2501 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2503 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2506 void kvm_mutex_unlock(void)
2508 assert(!cpu_single_env);
2509 pthread_mutex_unlock(&qemu_mutex);
2512 void kvm_mutex_lock(void)
2514 pthread_mutex_lock(&qemu_mutex);
2515 cpu_single_env = NULL;
2518 void qemu_mutex_unlock_iothread(void)
2520 if (kvm_enabled())
2521 kvm_mutex_unlock();
2524 void qemu_mutex_lock_iothread(void)
2526 if (kvm_enabled())
2527 kvm_mutex_lock();
2530 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2531 void kvm_add_ioperm_data(struct ioperm_data *data)
2533 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
2536 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2538 struct ioperm_data *data;
2540 data = QLIST_FIRST(&ioperm_head);
2541 while (data) {
2542 struct ioperm_data *next = QLIST_NEXT(data, entries);
2544 if (data->start_port == start_port && data->num == num) {
2545 QLIST_REMOVE(data, entries);
2546 qemu_free(data);
2549 data = next;
2553 void kvm_ioperm(CPUState *env, void *data)
2555 if (kvm_enabled() && qemu_system_ready)
2556 on_vcpu(env, kvm_arch_do_ioperm, data);
2559 #endif
2561 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2562 target_phys_addr_t end_addr)
2564 #ifndef TARGET_IA64
2566 #ifdef TARGET_I386
2567 if (must_use_aliases_source(start_addr))
2568 return 0;
2569 #endif
2571 kvm_get_dirty_pages_range(kvm_context, start_addr,
2572 end_addr - start_addr, NULL,
2573 kvm_get_dirty_bitmap_cb);
2574 #endif
2575 return 0;
2578 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2580 #ifdef TARGET_I386
2581 if (must_use_aliases_source(phys_addr))
2582 return 0;
2583 #endif
2585 #ifndef TARGET_IA64
2586 kvm_qemu_log_memory(phys_addr, len, 1);
2587 #endif
2588 return 0;
2591 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2593 #ifdef TARGET_I386
2594 if (must_use_aliases_source(phys_addr))
2595 return 0;
2596 #endif
2598 #ifndef TARGET_IA64
2599 kvm_qemu_log_memory(phys_addr, len, 0);
2600 #endif
2601 return 0;
2604 int kvm_set_boot_cpu_id(uint32_t id)
2606 return kvm_set_boot_vcpu_id(kvm_context, id);
2609 #ifdef TARGET_I386
2610 #ifdef KVM_CAP_MCE
2611 struct kvm_x86_mce_data {
2612 CPUState *env;
2613 struct kvm_x86_mce *mce;
2614 int abort_on_error;
2617 static void kvm_do_inject_x86_mce(void *_data)
2619 struct kvm_x86_mce_data *data = _data;
2620 int r;
2622 r = kvm_set_mce(data->env, data->mce);
2623 if (r < 0) {
2624 perror("kvm_set_mce FAILED");
2625 if (data->abort_on_error)
2626 abort();
2629 #endif
2631 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2632 uint64_t mcg_status, uint64_t addr, uint64_t misc,
2633 int abort_on_error)
2635 #ifdef KVM_CAP_MCE
2636 struct kvm_x86_mce mce = {
2637 .bank = bank,
2638 .status = status,
2639 .mcg_status = mcg_status,
2640 .addr = addr,
2641 .misc = misc,
2643 struct kvm_x86_mce_data data = {
2644 .env = cenv,
2645 .mce = &mce,
2646 .abort_on_error = abort_on_error,
2649 if (!cenv->mcg_cap) {
2650 fprintf(stderr, "MCE support is not enabled!\n");
2651 return;
2653 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2654 #else
2655 if (abort_on_error)
2656 abort();
2657 #endif
2659 #endif