reuse kvm_ioctl
[qemu-kvm/fedora.git] / qemu-kvm.c
blob98cfee0714af82647711b0ae0bbfb517fbf3a04d
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->kvm_cpu_state.stopped = 1;
96 return handle;
98 #endif
100 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
102 *data = cpu_inb(0, addr);
103 return 0;
106 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
108 *data = cpu_inw(0, addr);
109 return 0;
112 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
114 *data = cpu_inl(0, addr);
115 return 0;
118 #define PM_IO_BASE 0xb000
120 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
122 if (addr == 0xb2) {
123 switch (data) {
124 case 0: {
125 cpu_outb(0, 0xb3, 0);
126 break;
128 case 0xf0: {
129 unsigned x;
131 /* enable acpi */
132 x = cpu_inw(0, PM_IO_BASE + 4);
133 x &= ~1;
134 cpu_outw(0, PM_IO_BASE + 4, x);
135 break;
137 case 0xf1: {
138 unsigned x;
140 /* enable acpi */
141 x = cpu_inw(0, PM_IO_BASE + 4);
142 x |= 1;
143 cpu_outw(0, PM_IO_BASE + 4, x);
144 break;
146 default:
147 break;
149 return 0;
151 cpu_outb(0, addr, data);
152 return 0;
155 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
157 cpu_outw(0, addr, data);
158 return 0;
161 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
163 cpu_outl(0, addr, data);
164 return 0;
167 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
169 cpu_physical_memory_rw(addr, data, len, 0);
170 return 0;
173 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
175 cpu_physical_memory_rw(addr, data, len, 1);
176 return 0;
179 static int handle_unhandled(uint64_t reason)
181 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
182 return -EINVAL;
186 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
188 uint32_t *bitmap = kvm->used_gsi_bitmap;
190 if (gsi < kvm->max_gsi)
191 bitmap[gsi / 32] |= 1U << (gsi % 32);
192 else
193 DPRINTF("Invalid GSI %d\n");
196 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
198 uint32_t *bitmap = kvm->used_gsi_bitmap;
200 if (gsi < kvm->max_gsi)
201 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
202 else
203 DPRINTF("Invalid GSI %d\n");
206 struct slot_info {
207 unsigned long phys_addr;
208 unsigned long len;
209 unsigned long userspace_addr;
210 unsigned flags;
211 int logging_count;
214 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
216 static void init_slots(void)
218 int i;
220 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
221 slots[i].len = 0;
224 static int get_free_slot(kvm_context_t kvm)
226 int i;
227 int tss_ext;
229 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
230 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
231 #else
232 tss_ext = 0;
233 #endif
236 * on older kernels where the set tss ioctl is not supprted we must save
237 * slot 0 to hold the extended memory, as the vmx will use the last 3
238 * pages of this slot.
240 if (tss_ext > 0)
241 i = 0;
242 else
243 i = 1;
245 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
246 if (!slots[i].len)
247 return i;
248 return -1;
251 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
252 unsigned long userspace_addr, unsigned flags)
254 slots[slot].phys_addr = phys_addr;
255 slots[slot].len = len;
256 slots[slot].userspace_addr = userspace_addr;
257 slots[slot].flags = flags;
260 static void free_slot(int slot)
262 slots[slot].len = 0;
263 slots[slot].logging_count = 0;
266 static int get_slot(unsigned long phys_addr)
268 int i;
270 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
271 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
272 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
273 return i;
275 return -1;
278 /* Returns -1 if this slot is not totally contained on any other,
279 * and the number of the slot otherwise */
280 static int get_container_slot(uint64_t phys_addr, unsigned long size)
282 int i;
284 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
285 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
286 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
287 return i;
288 return -1;
291 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
293 int slot = get_container_slot(phys_addr, size);
294 if (slot == -1)
295 return 0;
296 return 1;
300 * dirty pages logging control
302 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
303 unsigned long phys_addr,
304 unsigned flags,
305 unsigned mask)
307 int r = -1;
308 int slot = get_slot(phys_addr);
310 if (slot == -1) {
311 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
312 return 1;
315 flags = (slots[slot].flags & ~mask) | flags;
316 if (flags == slots[slot].flags)
317 return 0;
318 slots[slot].flags = flags;
321 struct kvm_userspace_memory_region mem = {
322 .slot = slot,
323 .memory_size = slots[slot].len,
324 .guest_phys_addr = slots[slot].phys_addr,
325 .userspace_addr = slots[slot].userspace_addr,
326 .flags = slots[slot].flags,
330 DPRINTF("slot %d start %llx len %llx flags %x\n",
331 mem.slot,
332 mem.guest_phys_addr,
333 mem.memory_size,
334 mem.flags);
335 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
336 if (r < 0)
337 fprintf(stderr, "%s: %m\n", __FUNCTION__);
339 return r;
342 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
343 int (*change)(kvm_context_t kvm,
344 uint64_t start,
345 uint64_t len))
347 int i, r;
349 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
350 if (slots[i].len)
351 r = change(kvm, slots[i].phys_addr, slots[i].len);
353 return r;
356 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
357 uint64_t phys_addr,
358 uint64_t len)
360 int slot = get_slot(phys_addr);
362 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
363 if (slot == -1) {
364 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
365 return -EINVAL;
368 if (slots[slot].logging_count++)
369 return 0;
371 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
372 KVM_MEM_LOG_DIRTY_PAGES,
373 KVM_MEM_LOG_DIRTY_PAGES);
376 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
377 uint64_t phys_addr,
378 uint64_t len)
380 int slot = get_slot(phys_addr);
382 if (slot == -1) {
383 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
384 return -EINVAL;
387 if (--slots[slot].logging_count)
388 return 0;
390 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
392 KVM_MEM_LOG_DIRTY_PAGES);
396 * Enable dirty page logging for all memory regions
398 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
400 if (kvm->dirty_pages_log_all)
401 return 0;
402 kvm->dirty_pages_log_all = 1;
403 return kvm_dirty_pages_log_change_all(kvm,
404 kvm_dirty_pages_log_enable_slot);
408 * Enable dirty page logging only for memory regions that were created with
409 * dirty logging enabled (disable for all other memory regions).
411 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
413 if (!kvm->dirty_pages_log_all)
414 return 0;
415 kvm->dirty_pages_log_all = 0;
416 return kvm_dirty_pages_log_change_all(kvm,
417 kvm_dirty_pages_log_disable_slot);
421 int kvm_init(int smp_cpus)
423 int fd;
424 int r, gsi_count;
427 fd = open("/dev/kvm", O_RDWR);
428 if (fd == -1) {
429 perror("open /dev/kvm");
430 return -1;
432 r = ioctl(fd, KVM_GET_API_VERSION, 0);
433 if (r == -1) {
434 fprintf(stderr, "kvm kernel version too old: "
435 "KVM_GET_API_VERSION ioctl not supported\n");
436 goto out_close;
438 if (r < EXPECTED_KVM_API_VERSION) {
439 fprintf(stderr, "kvm kernel version too old: "
440 "We expect API version %d or newer, but got "
441 "version %d\n",
442 EXPECTED_KVM_API_VERSION, r);
443 goto out_close;
445 if (r > EXPECTED_KVM_API_VERSION) {
446 fprintf(stderr, "kvm userspace version too old\n");
447 goto out_close;
449 kvm_abi = r;
450 kvm_page_size = getpagesize();
451 kvm_state = qemu_mallocz(sizeof(*kvm_state));
452 kvm_context = &kvm_state->kvm_context;
454 kvm_state->fd = fd;
455 kvm_state->vmfd = -1;
456 kvm_context->opaque = cpu_single_env;
457 kvm_context->dirty_pages_log_all = 0;
458 kvm_context->no_irqchip_creation = 0;
459 kvm_context->no_pit_creation = 0;
461 #ifdef KVM_CAP_SET_GUEST_DEBUG
462 TAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
463 #endif
465 gsi_count = kvm_get_gsi_count(kvm_context);
466 if (gsi_count > 0) {
467 int gsi_bits, i;
469 /* Round up so we can search ints using ffs */
470 gsi_bits = ALIGN(gsi_count, 32);
471 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
472 kvm_context->max_gsi = gsi_bits;
474 /* Mark any over-allocated bits as already in use */
475 for (i = gsi_count; i < gsi_bits; i++)
476 set_gsi(kvm_context, i);
479 pthread_mutex_lock(&qemu_mutex);
480 return 0;
482 out_close:
483 close(fd);
484 return -1;
487 static void kvm_finalize(KVMState *s)
489 /* FIXME
490 if (kvm->vcpu_fd[0] != -1)
491 close(kvm->vcpu_fd[0]);
492 if (kvm->vm_fd != -1)
493 close(kvm->vm_fd);
495 close(s->fd);
496 free(s);
499 void kvm_disable_irqchip_creation(kvm_context_t kvm)
501 kvm->no_irqchip_creation = 1;
504 void kvm_disable_pit_creation(kvm_context_t kvm)
506 kvm->no_pit_creation = 1;
509 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
511 long mmap_size;
512 int r;
513 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
514 kvm_context_t kvm = kvm_context;
516 vcpu_ctx->kvm = kvm;
517 vcpu_ctx->id = id;
519 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
520 if (r < 0) {
521 fprintf(stderr, "kvm_create_vcpu: %m\n");
522 goto err;
524 vcpu_ctx->fd = r;
526 env->kvm_fd = r;
527 env->kvm_state = kvm_state;
529 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
530 if (mmap_size < 0) {
531 fprintf(stderr, "get vcpu mmap size: %m\n");
532 goto err_fd;
534 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
535 vcpu_ctx->fd, 0);
536 if (vcpu_ctx->run == MAP_FAILED) {
537 fprintf(stderr, "mmap vcpu area: %m\n");
538 goto err_fd;
540 return vcpu_ctx;
541 err_fd:
542 close(vcpu_ctx->fd);
543 err:
544 free(vcpu_ctx);
545 return NULL;
548 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
550 #ifdef KVM_CAP_SET_BOOT_CPU_ID
551 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
552 if (r > 0)
553 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
554 return -ENOSYS;
555 #else
556 return -ENOSYS;
557 #endif
560 int kvm_create_vm(kvm_context_t kvm)
562 int fd;
563 #ifdef KVM_CAP_IRQ_ROUTING
564 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
565 kvm->nr_allocated_irq_routes = 0;
566 #endif
568 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
569 if (fd < 0) {
570 fprintf(stderr, "kvm_create_vm: %m\n");
571 return -1;
573 kvm_state->vmfd = fd;
574 return 0;
577 static int kvm_create_default_phys_mem(kvm_context_t kvm,
578 unsigned long phys_mem_bytes,
579 void **vm_mem)
581 #ifdef KVM_CAP_USER_MEMORY
582 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
583 if (r > 0)
584 return 0;
585 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
586 #else
587 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
588 #endif
589 return -1;
592 int kvm_check_extension(kvm_context_t kvm, int ext)
594 int ret;
596 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, ext);
597 if (ret > 0)
598 return ret;
599 return 0;
602 void kvm_create_irqchip(kvm_context_t kvm)
604 int r;
606 kvm->irqchip_in_kernel = 0;
607 #ifdef KVM_CAP_IRQCHIP
608 if (!kvm->no_irqchip_creation) {
609 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
610 if (r > 0) { /* kernel irqchip supported */
611 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
612 if (r >= 0) {
613 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
614 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
615 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
616 KVM_CAP_IRQ_INJECT_STATUS);
617 if (r > 0)
618 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
619 #endif
620 kvm->irqchip_in_kernel = 1;
622 else
623 fprintf(stderr, "Create kernel PIC irqchip failed\n");
626 #endif
629 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
631 int r;
633 r = kvm_create_vm(kvm);
634 if (r < 0)
635 return r;
636 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
637 if (r < 0)
638 return r;
639 init_slots();
640 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
641 if (r < 0)
642 return r;
643 kvm_create_irqchip(kvm);
645 return 0;
649 int kvm_register_phys_mem(kvm_context_t kvm,
650 unsigned long phys_start, void *userspace_addr,
651 unsigned long len, int log)
654 struct kvm_userspace_memory_region memory = {
655 .memory_size = len,
656 .guest_phys_addr = phys_start,
657 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
658 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
660 int r;
662 memory.slot = get_free_slot(kvm);
663 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
664 memory.guest_phys_addr, memory.memory_size,
665 memory.userspace_addr, memory.slot, memory.flags);
666 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
667 if (r < 0) {
668 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
669 return -1;
671 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
672 memory.userspace_addr, memory.flags);
673 return 0;
677 /* destroy/free a whole slot.
678 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
680 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
681 unsigned long len)
683 int slot;
684 int r;
685 struct kvm_userspace_memory_region memory = {
686 .memory_size = 0,
687 .guest_phys_addr = phys_start,
688 .userspace_addr = 0,
689 .flags = 0,
692 slot = get_slot(phys_start);
694 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
695 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
696 __FUNCTION__, slot);
697 return;
699 if (phys_start != slots[slot].phys_addr) {
700 fprintf(stderr,
701 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
702 __FUNCTION__, phys_start, slots[slot].phys_addr);
703 phys_start = slots[slot].phys_addr;
706 memory.slot = slot;
707 DPRINTF("slot %d start %llx len %llx flags %x\n",
708 memory.slot,
709 memory.guest_phys_addr,
710 memory.memory_size,
711 memory.flags);
712 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
713 if (r < 0) {
714 fprintf(stderr, "destroy_userspace_phys_mem: %s",
715 strerror(-r));
716 return;
719 free_slot(memory.slot);
722 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
725 int slot = get_container_slot(phys_addr, size);
727 if (slot != -1) {
728 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
729 kvm_destroy_phys_mem(kvm, phys_addr, size);
730 return;
734 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
736 int r;
737 struct kvm_dirty_log log = {
738 .slot = slot,
741 log.dirty_bitmap = buf;
743 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
744 if (r < 0)
745 return r;
746 return 0;
749 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
751 int slot;
753 slot = get_slot(phys_addr);
754 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
757 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
758 unsigned long len, void *opaque,
759 int (*cb)(unsigned long start, unsigned long len,
760 void*bitmap, void *opaque))
762 int i;
763 int r;
764 unsigned long end_addr = phys_addr + len;
765 void *buf;
767 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
768 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
769 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
770 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
771 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
772 if (r) {
773 qemu_free(buf);
774 return r;
776 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
777 qemu_free(buf);
778 if (r)
779 return r;
782 return 0;
785 #ifdef KVM_CAP_IRQCHIP
787 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
789 struct kvm_irq_level event;
790 int r;
792 if (!kvm->irqchip_in_kernel)
793 return 0;
794 event.level = level;
795 event.irq = irq;
796 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
797 if (r < 0)
798 perror("kvm_set_irq_level");
800 if (status) {
801 #ifdef KVM_CAP_IRQ_INJECT_STATUS
802 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
803 1 : event.status;
804 #else
805 *status = 1;
806 #endif
809 return 1;
812 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
814 int r;
816 if (!kvm->irqchip_in_kernel)
817 return 0;
818 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
819 if (r < 0) {
820 perror("kvm_get_irqchip\n");
822 return r;
825 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
827 int r;
829 if (!kvm->irqchip_in_kernel)
830 return 0;
831 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
832 if (r < 0) {
833 perror("kvm_set_irqchip\n");
835 return r;
838 #endif
840 static int handle_io(kvm_vcpu_context_t vcpu)
842 struct kvm_run *run = vcpu->run;
843 kvm_context_t kvm = vcpu->kvm;
844 uint16_t addr = run->io.port;
845 int r;
846 int i;
847 void *p = (void *)run + run->io.data_offset;
849 for (i = 0; i < run->io.count; ++i) {
850 switch (run->io.direction) {
851 case KVM_EXIT_IO_IN:
852 switch (run->io.size) {
853 case 1:
854 r = kvm_inb(kvm->opaque, addr, p);
855 break;
856 case 2:
857 r = kvm_inw(kvm->opaque, addr, p);
858 break;
859 case 4:
860 r = kvm_inl(kvm->opaque, addr, p);
861 break;
862 default:
863 fprintf(stderr, "bad I/O size %d\n", run->io.size);
864 return -EMSGSIZE;
866 break;
867 case KVM_EXIT_IO_OUT:
868 switch (run->io.size) {
869 case 1:
870 r = kvm_outb(kvm->opaque, addr,
871 *(uint8_t *)p);
872 break;
873 case 2:
874 r = kvm_outw(kvm->opaque, addr,
875 *(uint16_t *)p);
876 break;
877 case 4:
878 r = kvm_outl(kvm->opaque, addr,
879 *(uint32_t *)p);
880 break;
881 default:
882 fprintf(stderr, "bad I/O size %d\n", run->io.size);
883 return -EMSGSIZE;
885 break;
886 default:
887 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
888 return -EPROTO;
891 p += run->io.size;
894 return 0;
897 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
899 #ifdef KVM_CAP_SET_GUEST_DEBUG
900 struct kvm_run *run = vcpu->run;
901 kvm_context_t kvm = vcpu->kvm;
903 return kvm_debug(kvm->opaque, env, &run->debug.arch);
904 #else
905 return 0;
906 #endif
909 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
911 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
914 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
916 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
919 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
921 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
924 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
926 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
929 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
931 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
934 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
936 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
939 #ifdef KVM_CAP_MP_STATE
940 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
942 int r;
944 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
945 if (r > 0)
946 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
947 return -ENOSYS;
950 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
952 int r;
954 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
955 if (r > 0)
956 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
957 return -ENOSYS;
959 #endif
961 static int handle_mmio(kvm_vcpu_context_t vcpu)
963 unsigned long addr = vcpu->run->mmio.phys_addr;
964 kvm_context_t kvm = vcpu->kvm;
965 struct kvm_run *kvm_run = vcpu->run;
966 void *data = kvm_run->mmio.data;
968 /* hack: Red Hat 7.1 generates these weird accesses. */
969 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
970 return 0;
972 if (kvm_run->mmio.is_write)
973 return kvm_mmio_write(kvm->opaque, addr, data,
974 kvm_run->mmio.len);
975 else
976 return kvm_mmio_read(kvm->opaque, addr, data,
977 kvm_run->mmio.len);
980 int handle_io_window(kvm_context_t kvm)
982 return 1;
985 int handle_halt(kvm_vcpu_context_t vcpu)
987 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
990 int handle_shutdown(kvm_context_t kvm, CPUState *env)
992 /* stop the current vcpu from going back to guest mode */
993 env->kvm_cpu_state.stopped = 1;
995 qemu_system_reset_request();
996 return 1;
999 static inline void push_nmi(kvm_context_t kvm)
1001 #ifdef KVM_CAP_USER_NMI
1002 kvm_arch_push_nmi(kvm->opaque);
1003 #endif /* KVM_CAP_USER_NMI */
1006 void post_kvm_run(kvm_context_t kvm, CPUState *env)
1008 pthread_mutex_lock(&qemu_mutex);
1009 kvm_arch_post_kvm_run(kvm->opaque, env);
1012 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
1014 kvm_arch_pre_kvm_run(kvm->opaque, env);
1016 if (env->exit_request)
1017 return 1;
1018 pthread_mutex_unlock(&qemu_mutex);
1019 return 0;
1022 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
1024 return vcpu->run->if_flag;
1027 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1029 return vcpu->run->ready_for_interrupt_injection;
1032 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1034 int r;
1035 int fd = vcpu->fd;
1036 struct kvm_run *run = vcpu->run;
1037 kvm_context_t kvm = vcpu->kvm;
1039 again:
1040 push_nmi(kvm);
1041 #if !defined(__s390__)
1042 if (!kvm->irqchip_in_kernel)
1043 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1044 #endif
1045 r = pre_kvm_run(kvm, env);
1046 if (r)
1047 return r;
1048 r = ioctl(fd, KVM_RUN, 0);
1050 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1051 r = -errno;
1052 post_kvm_run(kvm, env);
1053 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1054 return r;
1057 post_kvm_run(kvm, env);
1059 #if defined(KVM_CAP_COALESCED_MMIO)
1060 if (kvm->coalesced_mmio) {
1061 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1062 kvm->coalesced_mmio * PAGE_SIZE;
1063 while (ring->first != ring->last) {
1064 kvm_mmio_write(kvm->opaque,
1065 ring->coalesced_mmio[ring->first].phys_addr,
1066 &ring->coalesced_mmio[ring->first].data[0],
1067 ring->coalesced_mmio[ring->first].len);
1068 smp_wmb();
1069 ring->first = (ring->first + 1) %
1070 KVM_COALESCED_MMIO_MAX;
1073 #endif
1075 #if !defined(__s390__)
1076 if (r == -1) {
1077 r = handle_io_window(kvm);
1078 goto more;
1080 #endif
1081 if (1) {
1082 switch (run->exit_reason) {
1083 case KVM_EXIT_UNKNOWN:
1084 r = handle_unhandled(run->hw.hardware_exit_reason);
1085 break;
1086 case KVM_EXIT_FAIL_ENTRY:
1087 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1088 break;
1089 case KVM_EXIT_EXCEPTION:
1090 fprintf(stderr, "exception %d (%x)\n",
1091 run->ex.exception,
1092 run->ex.error_code);
1093 kvm_show_regs(vcpu);
1094 kvm_show_code(vcpu);
1095 abort();
1096 break;
1097 case KVM_EXIT_IO:
1098 r = handle_io(vcpu);
1099 break;
1100 case KVM_EXIT_DEBUG:
1101 r = handle_debug(vcpu, env);
1102 break;
1103 case KVM_EXIT_MMIO:
1104 r = handle_mmio(vcpu);
1105 break;
1106 case KVM_EXIT_HLT:
1107 r = handle_halt(vcpu);
1108 break;
1109 case KVM_EXIT_IRQ_WINDOW_OPEN:
1110 break;
1111 case KVM_EXIT_SHUTDOWN:
1112 r = handle_shutdown(kvm, env);
1113 break;
1114 #if defined(__s390__)
1115 case KVM_EXIT_S390_SIEIC:
1116 r = kvm_s390_handle_intercept(kvm, vcpu,
1117 run);
1118 break;
1119 case KVM_EXIT_S390_RESET:
1120 r = kvm_s390_handle_reset(kvm, vcpu, run);
1121 break;
1122 #endif
1123 default:
1124 if (kvm_arch_run(vcpu)) {
1125 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1126 run->exit_reason);
1127 kvm_show_regs(vcpu);
1128 abort();
1130 break;
1133 more:
1134 if (!r)
1135 goto again;
1136 return r;
1139 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1141 struct kvm_interrupt intr;
1143 intr.irq = irq;
1144 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1147 #ifdef KVM_CAP_SET_GUEST_DEBUG
1148 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1150 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1152 #endif
1154 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1156 struct kvm_signal_mask *sigmask;
1157 int r;
1159 if (!sigset) {
1160 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1161 if (r == -1)
1162 r = -errno;
1163 return r;
1165 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1167 sigmask->len = 8;
1168 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1169 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1170 if (r == -1)
1171 r = -errno;
1172 free(sigmask);
1173 return r;
1176 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1178 return kvm->irqchip_in_kernel;
1181 int kvm_pit_in_kernel(kvm_context_t kvm)
1183 return kvm->pit_in_kernel;
1186 int kvm_has_sync_mmu(void)
1188 int r = 0;
1189 #ifdef KVM_CAP_SYNC_MMU
1190 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1191 #endif
1192 return r;
1195 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1197 #ifdef KVM_CAP_USER_NMI
1198 return ioctl(vcpu->fd, KVM_NMI);
1199 #else
1200 return -ENOSYS;
1201 #endif
1204 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1206 int r = 0;
1207 kvm->coalesced_mmio = 0;
1208 #ifdef KVM_CAP_COALESCED_MMIO
1209 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1210 if (r > 0) {
1211 kvm->coalesced_mmio = r;
1212 return 0;
1214 #endif
1215 return r;
1218 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1220 #ifdef KVM_CAP_COALESCED_MMIO
1221 kvm_context_t kvm = kvm_context;
1222 struct kvm_coalesced_mmio_zone zone;
1223 int r;
1225 if (kvm->coalesced_mmio) {
1227 zone.addr = addr;
1228 zone.size = size;
1230 r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone);
1231 if (r < 0) {
1232 perror("kvm_register_coalesced_mmio_zone");
1233 return r;
1235 return 0;
1237 #endif
1238 return -ENOSYS;
1241 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1243 #ifdef KVM_CAP_COALESCED_MMIO
1244 kvm_context_t kvm = kvm_context;
1245 struct kvm_coalesced_mmio_zone zone;
1246 int r;
1248 if (kvm->coalesced_mmio) {
1250 zone.addr = addr;
1251 zone.size = size;
1253 r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1254 if (r < 0) {
1255 perror("kvm_unregister_coalesced_mmio_zone");
1256 return r;
1258 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1259 return 0;
1261 #endif
1262 return -ENOSYS;
1265 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1266 int kvm_assign_pci_device(kvm_context_t kvm,
1267 struct kvm_assigned_pci_dev *assigned_dev)
1269 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1272 static int kvm_old_assign_irq(kvm_context_t kvm,
1273 struct kvm_assigned_irq *assigned_irq)
1275 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1278 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1279 int kvm_assign_irq(kvm_context_t kvm,
1280 struct kvm_assigned_irq *assigned_irq)
1282 int ret;
1284 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1285 if (ret > 0) {
1286 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1289 return kvm_old_assign_irq(kvm, assigned_irq);
1292 int kvm_deassign_irq(kvm_context_t kvm,
1293 struct kvm_assigned_irq *assigned_irq)
1295 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1297 #else
1298 int kvm_assign_irq(kvm_context_t kvm,
1299 struct kvm_assigned_irq *assigned_irq)
1301 return kvm_old_assign_irq(kvm, assigned_irq);
1303 #endif
1304 #endif
1306 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1307 int kvm_deassign_pci_device(kvm_context_t kvm,
1308 struct kvm_assigned_pci_dev *assigned_dev)
1310 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1312 #endif
1314 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1316 int ret = 0;
1318 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1319 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1320 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1321 if (ret <= 0)
1322 ret = 0;
1323 #endif
1324 return ret;
1327 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1329 #ifdef KVM_CAP_REINJECT_CONTROL
1330 int r;
1331 struct kvm_reinject_control control;
1333 control.pit_reinject = pit_reinject;
1335 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1336 if (r > 0) {
1337 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1339 #endif
1340 return -ENOSYS;
1343 int kvm_has_gsi_routing(kvm_context_t kvm)
1345 int r = 0;
1347 #ifdef KVM_CAP_IRQ_ROUTING
1348 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1349 #endif
1350 return r;
1353 int kvm_get_gsi_count(kvm_context_t kvm)
1355 #ifdef KVM_CAP_IRQ_ROUTING
1356 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1357 #else
1358 return -EINVAL;
1359 #endif
1362 int kvm_clear_gsi_routes(kvm_context_t kvm)
1364 #ifdef KVM_CAP_IRQ_ROUTING
1365 kvm->irq_routes->nr = 0;
1366 return 0;
1367 #else
1368 return -EINVAL;
1369 #endif
1372 int kvm_add_routing_entry(kvm_context_t kvm,
1373 struct kvm_irq_routing_entry* entry)
1375 #ifdef KVM_CAP_IRQ_ROUTING
1376 struct kvm_irq_routing *z;
1377 struct kvm_irq_routing_entry *new;
1378 int n, size;
1380 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1381 n = kvm->nr_allocated_irq_routes * 2;
1382 if (n < 64)
1383 n = 64;
1384 size = sizeof(struct kvm_irq_routing);
1385 size += n * sizeof(*new);
1386 z = realloc(kvm->irq_routes, size);
1387 if (!z)
1388 return -ENOMEM;
1389 kvm->nr_allocated_irq_routes = n;
1390 kvm->irq_routes = z;
1392 n = kvm->irq_routes->nr++;
1393 new = &kvm->irq_routes->entries[n];
1394 memset(new, 0, sizeof(*new));
1395 new->gsi = entry->gsi;
1396 new->type = entry->type;
1397 new->flags = entry->flags;
1398 new->u = entry->u;
1400 set_gsi(kvm, entry->gsi);
1402 return 0;
1403 #else
1404 return -ENOSYS;
1405 #endif
1408 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1410 #ifdef KVM_CAP_IRQ_ROUTING
1411 struct kvm_irq_routing_entry e;
1413 e.gsi = gsi;
1414 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1415 e.flags = 0;
1416 e.u.irqchip.irqchip = irqchip;
1417 e.u.irqchip.pin = pin;
1418 return kvm_add_routing_entry(kvm, &e);
1419 #else
1420 return -ENOSYS;
1421 #endif
1424 int kvm_del_routing_entry(kvm_context_t kvm,
1425 struct kvm_irq_routing_entry* entry)
1427 #ifdef KVM_CAP_IRQ_ROUTING
1428 struct kvm_irq_routing_entry *e, *p;
1429 int i, gsi, found = 0;
1431 gsi = entry->gsi;
1433 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1434 e = &kvm->irq_routes->entries[i];
1435 if (e->type == entry->type
1436 && e->gsi == gsi) {
1437 switch (e->type)
1439 case KVM_IRQ_ROUTING_IRQCHIP: {
1440 if (e->u.irqchip.irqchip ==
1441 entry->u.irqchip.irqchip
1442 && e->u.irqchip.pin ==
1443 entry->u.irqchip.pin) {
1444 p = &kvm->irq_routes->
1445 entries[--kvm->irq_routes->nr];
1446 *e = *p;
1447 found = 1;
1449 break;
1451 case KVM_IRQ_ROUTING_MSI: {
1452 if (e->u.msi.address_lo ==
1453 entry->u.msi.address_lo
1454 && e->u.msi.address_hi ==
1455 entry->u.msi.address_hi
1456 && e->u.msi.data == entry->u.msi.data) {
1457 p = &kvm->irq_routes->
1458 entries[--kvm->irq_routes->nr];
1459 *e = *p;
1460 found = 1;
1462 break;
1464 default:
1465 break;
1467 if (found) {
1468 /* If there are no other users of this GSI
1469 * mark it available in the bitmap */
1470 for (i = 0; i < kvm->irq_routes->nr; i++) {
1471 e = &kvm->irq_routes->entries[i];
1472 if (e->gsi == gsi)
1473 break;
1475 if (i == kvm->irq_routes->nr)
1476 clear_gsi(kvm, gsi);
1478 return 0;
1482 return -ESRCH;
1483 #else
1484 return -ENOSYS;
1485 #endif
1488 int kvm_update_routing_entry(kvm_context_t kvm,
1489 struct kvm_irq_routing_entry* entry,
1490 struct kvm_irq_routing_entry* newentry)
1492 #ifdef KVM_CAP_IRQ_ROUTING
1493 struct kvm_irq_routing_entry *e;
1494 int i;
1496 if (entry->gsi != newentry->gsi ||
1497 entry->type != newentry->type) {
1498 return -EINVAL;
1501 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1502 e = &kvm->irq_routes->entries[i];
1503 if (e->type != entry->type || e->gsi != entry->gsi) {
1504 continue;
1506 switch (e->type) {
1507 case KVM_IRQ_ROUTING_IRQCHIP:
1508 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1509 e->u.irqchip.pin == entry->u.irqchip.pin) {
1510 memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
1511 return 0;
1513 break;
1514 case KVM_IRQ_ROUTING_MSI:
1515 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1516 e->u.msi.address_hi == entry->u.msi.address_hi &&
1517 e->u.msi.data == entry->u.msi.data) {
1518 memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
1519 return 0;
1521 break;
1522 default:
1523 break;
1526 return -ESRCH;
1527 #else
1528 return -ENOSYS;
1529 #endif
1532 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1534 #ifdef KVM_CAP_IRQ_ROUTING
1535 struct kvm_irq_routing_entry e;
1537 e.gsi = gsi;
1538 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1539 e.flags = 0;
1540 e.u.irqchip.irqchip = irqchip;
1541 e.u.irqchip.pin = pin;
1542 return kvm_del_routing_entry(kvm, &e);
1543 #else
1544 return -ENOSYS;
1545 #endif
1548 int kvm_commit_irq_routes(kvm_context_t kvm)
1550 #ifdef KVM_CAP_IRQ_ROUTING
1551 kvm->irq_routes->flags = 0;
1552 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1553 #else
1554 return -ENOSYS;
1555 #endif
1558 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1560 int i, bit;
1561 uint32_t *buf = kvm->used_gsi_bitmap;
1563 /* Return the lowest unused GSI in the bitmap */
1564 for (i = 0; i < kvm->max_gsi / 32; i++) {
1565 bit = ffs(~buf[i]);
1566 if (!bit)
1567 continue;
1569 return bit - 1 + i * 32;
1572 return -ENOSPC;
1575 #ifdef KVM_CAP_DEVICE_MSIX
1576 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1577 struct kvm_assigned_msix_nr *msix_nr)
1579 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1582 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1583 struct kvm_assigned_msix_entry *entry)
1585 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1587 #endif
1589 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1591 #include <sys/eventfd.h>
1593 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1595 struct kvm_irqfd data = {
1596 .fd = fd,
1597 .gsi = gsi,
1598 .flags = flags,
1601 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1604 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1606 int r;
1607 int fd;
1609 if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
1610 return -ENOENT;
1612 fd = eventfd(0, 0);
1613 if (fd < 0)
1614 return -errno;
1616 r = _kvm_irqfd(kvm, fd, gsi, 0);
1617 if (r < 0) {
1618 close(fd);
1619 return -errno;
1622 return fd;
1625 #else /* KVM_CAP_IRQFD */
1627 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1629 return -ENOSYS;
1632 #endif /* KVM_CAP_IRQFD */
1633 static inline unsigned long kvm_get_thread_id(void)
1635 return syscall(SYS_gettid);
1638 static void qemu_cond_wait(pthread_cond_t *cond)
1640 CPUState *env = cpu_single_env;
1641 static const struct timespec ts = {
1642 .tv_sec = 0,
1643 .tv_nsec = 100000,
1646 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1647 cpu_single_env = env;
1650 static void sig_ipi_handler(int n)
1654 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1656 struct qemu_work_item wi;
1658 if (env == current_env) {
1659 func(data);
1660 return;
1663 wi.func = func;
1664 wi.data = data;
1665 if (!env->kvm_cpu_state.queued_work_first)
1666 env->kvm_cpu_state.queued_work_first = &wi;
1667 else
1668 env->kvm_cpu_state.queued_work_last->next = &wi;
1669 env->kvm_cpu_state.queued_work_last = &wi;
1670 wi.next = NULL;
1671 wi.done = false;
1673 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1674 while (!wi.done)
1675 qemu_cond_wait(&qemu_work_cond);
1678 static void inject_interrupt(void *data)
1680 cpu_interrupt(current_env, (long)data);
1683 void kvm_inject_interrupt(CPUState *env, int mask)
1685 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1688 void kvm_update_interrupt_request(CPUState *env)
1690 int signal = 0;
1692 if (env) {
1693 if (!current_env || !current_env->kvm_cpu_state.created)
1694 signal = 1;
1696 * Testing for created here is really redundant
1698 if (current_env && current_env->kvm_cpu_state.created &&
1699 env != current_env && !env->kvm_cpu_state.signalled)
1700 signal = 1;
1702 if (signal) {
1703 env->kvm_cpu_state.signalled = 1;
1704 if (env->kvm_cpu_state.thread)
1705 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1710 static void kvm_do_load_registers(void *_env)
1712 CPUState *env = _env;
1714 kvm_arch_load_regs(env);
1717 void kvm_load_registers(CPUState *env)
1719 if (kvm_enabled() && qemu_system_ready)
1720 on_vcpu(env, kvm_do_load_registers, env);
1723 static void kvm_do_save_registers(void *_env)
1725 CPUState *env = _env;
1727 kvm_arch_save_regs(env);
1730 void kvm_save_registers(CPUState *env)
1732 if (kvm_enabled())
1733 on_vcpu(env, kvm_do_save_registers, env);
1736 static void kvm_do_load_mpstate(void *_env)
1738 CPUState *env = _env;
1740 kvm_arch_load_mpstate(env);
1743 void kvm_load_mpstate(CPUState *env)
1745 if (kvm_enabled() && qemu_system_ready)
1746 on_vcpu(env, kvm_do_load_mpstate, env);
1749 static void kvm_do_save_mpstate(void *_env)
1751 CPUState *env = _env;
1753 kvm_arch_save_mpstate(env);
1754 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1757 void kvm_save_mpstate(CPUState *env)
1759 if (kvm_enabled())
1760 on_vcpu(env, kvm_do_save_mpstate, env);
1763 int kvm_cpu_exec(CPUState *env)
1765 int r;
1767 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1768 if (r < 0) {
1769 printf("kvm_run returned %d\n", r);
1770 vm_stop(0);
1773 return 0;
1776 static int is_cpu_stopped(CPUState *env)
1778 return !vm_running || env->kvm_cpu_state.stopped;
1781 static void flush_queued_work(CPUState *env)
1783 struct qemu_work_item *wi;
1785 if (!env->kvm_cpu_state.queued_work_first)
1786 return;
1788 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1789 env->kvm_cpu_state.queued_work_first = wi->next;
1790 wi->func(wi->data);
1791 wi->done = true;
1793 env->kvm_cpu_state.queued_work_last = NULL;
1794 pthread_cond_broadcast(&qemu_work_cond);
1797 static void kvm_main_loop_wait(CPUState *env, int timeout)
1799 struct timespec ts;
1800 int r, e;
1801 siginfo_t siginfo;
1802 sigset_t waitset;
1804 pthread_mutex_unlock(&qemu_mutex);
1806 ts.tv_sec = timeout / 1000;
1807 ts.tv_nsec = (timeout % 1000) * 1000000;
1808 sigemptyset(&waitset);
1809 sigaddset(&waitset, SIG_IPI);
1811 r = sigtimedwait(&waitset, &siginfo, &ts);
1812 e = errno;
1814 pthread_mutex_lock(&qemu_mutex);
1816 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1817 printf("sigtimedwait: %s\n", strerror(e));
1818 exit(1);
1821 cpu_single_env = env;
1822 flush_queued_work(env);
1824 if (env->kvm_cpu_state.stop) {
1825 env->kvm_cpu_state.stop = 0;
1826 env->kvm_cpu_state.stopped = 1;
1827 pthread_cond_signal(&qemu_pause_cond);
1830 env->kvm_cpu_state.signalled = 0;
1833 static int all_threads_paused(void)
1835 CPUState *penv = first_cpu;
1837 while (penv) {
1838 if (penv->kvm_cpu_state.stop)
1839 return 0;
1840 penv = (CPUState *)penv->next_cpu;
1843 return 1;
1846 static void pause_all_threads(void)
1848 CPUState *penv = first_cpu;
1850 while (penv) {
1851 if (penv != cpu_single_env) {
1852 penv->kvm_cpu_state.stop = 1;
1853 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1854 } else {
1855 penv->kvm_cpu_state.stop = 0;
1856 penv->kvm_cpu_state.stopped = 1;
1857 cpu_exit(penv);
1859 penv = (CPUState *)penv->next_cpu;
1862 while (!all_threads_paused())
1863 qemu_cond_wait(&qemu_pause_cond);
1866 static void resume_all_threads(void)
1868 CPUState *penv = first_cpu;
1870 assert(!cpu_single_env);
1872 while (penv) {
1873 penv->kvm_cpu_state.stop = 0;
1874 penv->kvm_cpu_state.stopped = 0;
1875 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1876 penv = (CPUState *)penv->next_cpu;
1880 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1882 if (running)
1883 resume_all_threads();
1884 else
1885 pause_all_threads();
1888 static void setup_kernel_sigmask(CPUState *env)
1890 sigset_t set;
1892 sigemptyset(&set);
1893 sigaddset(&set, SIGUSR2);
1894 sigaddset(&set, SIGIO);
1895 sigaddset(&set, SIGALRM);
1896 sigprocmask(SIG_BLOCK, &set, NULL);
1898 sigprocmask(SIG_BLOCK, NULL, &set);
1899 sigdelset(&set, SIG_IPI);
1901 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1904 static void qemu_kvm_system_reset(void)
1906 CPUState *penv = first_cpu;
1908 pause_all_threads();
1910 qemu_system_reset();
1912 while (penv) {
1913 kvm_arch_cpu_reset(penv);
1914 penv = (CPUState *)penv->next_cpu;
1917 resume_all_threads();
1920 static void process_irqchip_events(CPUState *env)
1922 kvm_arch_process_irqchip_events(env);
1923 if (kvm_arch_has_work(env))
1924 env->halted = 0;
1927 static int kvm_main_loop_cpu(CPUState *env)
1929 setup_kernel_sigmask(env);
1931 pthread_mutex_lock(&qemu_mutex);
1933 kvm_qemu_init_env(env);
1934 #ifdef TARGET_I386
1935 kvm_tpr_vcpu_start(env);
1936 #endif
1938 cpu_single_env = env;
1939 kvm_arch_load_regs(env);
1941 while (1) {
1942 int run_cpu = !is_cpu_stopped(env);
1943 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1944 process_irqchip_events(env);
1945 run_cpu = !env->halted;
1947 if (run_cpu) {
1948 kvm_main_loop_wait(env, 0);
1949 kvm_cpu_exec(env);
1950 } else {
1951 kvm_main_loop_wait(env, 1000);
1954 pthread_mutex_unlock(&qemu_mutex);
1955 return 0;
1958 static void *ap_main_loop(void *_env)
1960 CPUState *env = _env;
1961 sigset_t signals;
1962 struct ioperm_data *data = NULL;
1964 current_env = env;
1965 env->thread_id = kvm_get_thread_id();
1966 sigfillset(&signals);
1967 sigprocmask(SIG_BLOCK, &signals, NULL);
1968 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1970 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1971 /* do ioperm for io ports of assigned devices */
1972 LIST_FOREACH(data, &ioperm_head, entries)
1973 on_vcpu(env, kvm_arch_do_ioperm, data);
1974 #endif
1976 /* signal VCPU creation */
1977 pthread_mutex_lock(&qemu_mutex);
1978 current_env->kvm_cpu_state.created = 1;
1979 pthread_cond_signal(&qemu_vcpu_cond);
1981 /* and wait for machine initialization */
1982 while (!qemu_system_ready)
1983 qemu_cond_wait(&qemu_system_cond);
1984 pthread_mutex_unlock(&qemu_mutex);
1986 kvm_main_loop_cpu(env);
1987 return NULL;
1990 void kvm_init_vcpu(CPUState *env)
1992 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1994 while (env->kvm_cpu_state.created == 0)
1995 qemu_cond_wait(&qemu_vcpu_cond);
1998 int kvm_vcpu_inited(CPUState *env)
2000 return env->kvm_cpu_state.created;
2003 #ifdef TARGET_I386
2004 void kvm_hpet_disable_kpit(void)
2006 struct kvm_pit_state2 ps2;
2008 kvm_get_pit2(kvm_context, &ps2);
2009 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
2010 kvm_set_pit2(kvm_context, &ps2);
2013 void kvm_hpet_enable_kpit(void)
2015 struct kvm_pit_state2 ps2;
2017 kvm_get_pit2(kvm_context, &ps2);
2018 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
2019 kvm_set_pit2(kvm_context, &ps2);
2021 #endif
2023 int kvm_init_ap(void)
2025 #ifdef TARGET_I386
2026 kvm_tpr_opt_setup();
2027 #endif
2028 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2030 signal(SIG_IPI, sig_ipi_handler);
2031 return 0;
2034 void qemu_kvm_notify_work(void)
2036 uint64_t value = 1;
2037 char buffer[8];
2038 size_t offset = 0;
2040 if (io_thread_fd == -1)
2041 return;
2043 memcpy(buffer, &value, sizeof(value));
2045 while (offset < 8) {
2046 ssize_t len;
2048 len = write(io_thread_fd, buffer + offset, 8 - offset);
2049 if (len == -1 && errno == EINTR)
2050 continue;
2052 /* In case we have a pipe, there is not reason to insist writing
2053 * 8 bytes
2055 if (len == -1 && errno == EAGAIN)
2056 break;
2058 if (len <= 0)
2059 break;
2061 offset += len;
2065 /* If we have signalfd, we mask out the signals we want to handle and then
2066 * use signalfd to listen for them. We rely on whatever the current signal
2067 * handler is to dispatch the signals when we receive them.
2070 static void sigfd_handler(void *opaque)
2072 int fd = (unsigned long)opaque;
2073 struct qemu_signalfd_siginfo info;
2074 struct sigaction action;
2075 ssize_t len;
2077 while (1) {
2078 do {
2079 len = read(fd, &info, sizeof(info));
2080 } while (len == -1 && errno == EINTR);
2082 if (len == -1 && errno == EAGAIN)
2083 break;
2085 if (len != sizeof(info)) {
2086 printf("read from sigfd returned %zd: %m\n", len);
2087 return;
2090 sigaction(info.ssi_signo, NULL, &action);
2091 if (action.sa_handler)
2092 action.sa_handler(info.ssi_signo);
2097 /* Used to break IO thread out of select */
2098 static void io_thread_wakeup(void *opaque)
2100 int fd = (unsigned long)opaque;
2101 char buffer[4096];
2103 /* Drain the pipe/(eventfd) */
2104 while (1) {
2105 ssize_t len;
2107 len = read(fd, buffer, sizeof(buffer));
2108 if (len == -1 && errno == EINTR)
2109 continue;
2111 if (len <= 0)
2112 break;
2116 int kvm_main_loop(void)
2118 int fds[2];
2119 sigset_t mask;
2120 int sigfd;
2122 io_thread = pthread_self();
2123 qemu_system_ready = 1;
2125 if (qemu_eventfd(fds) == -1) {
2126 fprintf(stderr, "failed to create eventfd\n");
2127 return -errno;
2130 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2131 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2133 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2134 (void *)(unsigned long)fds[0]);
2136 io_thread_fd = fds[1];
2138 sigemptyset(&mask);
2139 sigaddset(&mask, SIGIO);
2140 sigaddset(&mask, SIGALRM);
2141 sigprocmask(SIG_BLOCK, &mask, NULL);
2143 sigfd = qemu_signalfd(&mask);
2144 if (sigfd == -1) {
2145 fprintf(stderr, "failed to create signalfd\n");
2146 return -errno;
2149 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2151 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2152 (void *)(unsigned long)sigfd);
2154 pthread_cond_broadcast(&qemu_system_cond);
2156 io_thread_sigfd = sigfd;
2157 cpu_single_env = NULL;
2159 while (1) {
2160 main_loop_wait(1000);
2161 if (qemu_shutdown_requested()) {
2162 if (qemu_no_shutdown()) {
2163 vm_stop(0);
2164 } else
2165 break;
2166 } else if (qemu_powerdown_requested())
2167 qemu_system_powerdown();
2168 else if (qemu_reset_requested())
2169 qemu_kvm_system_reset();
2170 else if (kvm_debug_cpu_requested) {
2171 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2172 vm_stop(EXCP_DEBUG);
2173 kvm_debug_cpu_requested = NULL;
2177 pause_all_threads();
2178 pthread_mutex_unlock(&qemu_mutex);
2180 return 0;
2183 #ifdef TARGET_I386
2184 static int destroy_region_works = 0;
2185 #endif
2188 #if !defined(TARGET_I386)
2189 int kvm_arch_init_irq_routing(void)
2191 return 0;
2193 #endif
2195 int kvm_qemu_create_context(void)
2197 int r;
2199 if (!kvm_irqchip) {
2200 kvm_disable_irqchip_creation(kvm_context);
2202 if (!kvm_pit) {
2203 kvm_disable_pit_creation(kvm_context);
2205 if (kvm_create(kvm_context, 0, NULL) < 0) {
2206 kvm_finalize(kvm_state);
2207 return -1;
2209 r = kvm_arch_qemu_create_context();
2210 if(r <0)
2211 kvm_finalize(kvm_state);
2212 if (kvm_pit && !kvm_pit_reinject) {
2213 if (kvm_reinject_control(kvm_context, 0)) {
2214 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2215 return -1;
2218 #ifdef TARGET_I386
2219 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2220 #endif
2222 r = kvm_arch_init_irq_routing();
2223 if (r < 0) {
2224 return r;
2227 return 0;
2230 #ifdef TARGET_I386
2231 static int must_use_aliases_source(target_phys_addr_t addr)
2233 if (destroy_region_works)
2234 return false;
2235 if (addr == 0xa0000 || addr == 0xa8000)
2236 return true;
2237 return false;
2240 static int must_use_aliases_target(target_phys_addr_t addr)
2242 if (destroy_region_works)
2243 return false;
2244 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2245 return true;
2246 return false;
2249 static struct mapping {
2250 target_phys_addr_t phys;
2251 ram_addr_t ram;
2252 ram_addr_t len;
2253 } mappings[50];
2254 static int nr_mappings;
2256 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2258 struct mapping *p;
2260 for (p = mappings; p < mappings + nr_mappings; ++p) {
2261 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2262 return p;
2265 return NULL;
2268 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2270 struct mapping *p;
2272 for (p = mappings; p < mappings + nr_mappings; ++p) {
2273 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2274 return p;
2277 return NULL;
2280 static void drop_mapping(target_phys_addr_t start_addr)
2282 struct mapping *p = find_mapping(start_addr);
2284 if (p)
2285 *p = mappings[--nr_mappings];
2287 #endif
2289 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2290 ram_addr_t phys_offset)
2292 int r = 0;
2293 unsigned long area_flags;
2294 #ifdef TARGET_I386
2295 struct mapping *p;
2296 #endif
2298 if (start_addr + size > phys_ram_size) {
2299 phys_ram_size = start_addr + size;
2302 phys_offset &= ~IO_MEM_ROM;
2303 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2305 if (area_flags != IO_MEM_RAM) {
2306 #ifdef TARGET_I386
2307 if (must_use_aliases_source(start_addr)) {
2308 kvm_destroy_memory_alias(kvm_context, start_addr);
2309 return;
2311 if (must_use_aliases_target(start_addr))
2312 return;
2313 #endif
2314 while (size > 0) {
2315 p = find_mapping(start_addr);
2316 if (p) {
2317 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2318 drop_mapping(p->phys);
2320 start_addr += TARGET_PAGE_SIZE;
2321 if (size > TARGET_PAGE_SIZE) {
2322 size -= TARGET_PAGE_SIZE;
2323 } else {
2324 size = 0;
2327 return;
2330 r = kvm_is_containing_region(kvm_context, start_addr, size);
2331 if (r)
2332 return;
2334 if (area_flags >= TLB_MMIO)
2335 return;
2337 #ifdef TARGET_I386
2338 if (must_use_aliases_source(start_addr)) {
2339 p = find_ram_mapping(phys_offset);
2340 if (p) {
2341 kvm_create_memory_alias(kvm_context, start_addr, size,
2342 p->phys + (phys_offset - p->ram));
2344 return;
2346 #endif
2348 r = kvm_register_phys_mem(kvm_context, start_addr,
2349 qemu_get_ram_ptr(phys_offset),
2350 size, 0);
2351 if (r < 0) {
2352 printf("kvm_cpu_register_physical_memory: failed\n");
2353 exit(1);
2356 #ifdef TARGET_I386
2357 drop_mapping(start_addr);
2358 p = &mappings[nr_mappings++];
2359 p->phys = start_addr;
2360 p->ram = phys_offset;
2361 p->len = size;
2362 #endif
2364 return;
2367 int kvm_setup_guest_memory(void *area, unsigned long size)
2369 int ret = 0;
2371 #ifdef MADV_DONTFORK
2372 if (kvm_enabled() && !kvm_has_sync_mmu())
2373 ret = madvise(area, size, MADV_DONTFORK);
2374 #endif
2376 if (ret)
2377 perror ("madvise");
2379 return ret;
2382 int kvm_qemu_check_extension(int ext)
2384 return kvm_check_extension(kvm_context, ext);
2387 int kvm_qemu_init_env(CPUState *cenv)
2389 return kvm_arch_qemu_init_env(cenv);
2392 #ifdef KVM_CAP_SET_GUEST_DEBUG
2394 struct kvm_set_guest_debug_data {
2395 struct kvm_guest_debug dbg;
2396 int err;
2399 static void kvm_invoke_set_guest_debug(void *data)
2401 struct kvm_set_guest_debug_data *dbg_data = data;
2403 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2404 &dbg_data->dbg);
2407 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2409 struct kvm_set_guest_debug_data data;
2411 data.dbg.control = 0;
2412 if (env->singlestep_enabled)
2413 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2415 kvm_arch_update_guest_debug(env, &data.dbg);
2416 data.dbg.control |= reinject_trap;
2418 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2419 return data.err;
2422 #endif
2425 * dirty pages logging
2427 /* FIXME: use unsigned long pointer instead of unsigned char */
2428 unsigned char *kvm_dirty_bitmap = NULL;
2429 int kvm_physical_memory_set_dirty_tracking(int enable)
2431 int r = 0;
2433 if (!kvm_enabled())
2434 return 0;
2436 if (enable) {
2437 if (!kvm_dirty_bitmap) {
2438 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2439 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2440 if (kvm_dirty_bitmap == NULL) {
2441 perror("Failed to allocate dirty pages bitmap");
2442 r=-1;
2444 else {
2445 r = kvm_dirty_pages_log_enable_all(kvm_context);
2449 else {
2450 if (kvm_dirty_bitmap) {
2451 r = kvm_dirty_pages_log_reset(kvm_context);
2452 qemu_free(kvm_dirty_bitmap);
2453 kvm_dirty_bitmap = NULL;
2456 return r;
2459 /* get kvm's dirty pages bitmap and update qemu's */
2460 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2461 unsigned char *bitmap,
2462 unsigned long offset,
2463 unsigned long mem_size)
2465 unsigned int i, j, n=0;
2466 unsigned char c;
2467 unsigned long page_number, addr, addr1;
2468 ram_addr_t ram_addr;
2469 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2472 * bitmap-traveling is faster than memory-traveling (for addr...)
2473 * especially when most of the memory is not dirty.
2475 for (i=0; i<len; i++) {
2476 c = bitmap[i];
2477 while (c>0) {
2478 j = ffsl(c) - 1;
2479 c &= ~(1u<<j);
2480 page_number = i * 8 + j;
2481 addr1 = page_number * TARGET_PAGE_SIZE;
2482 addr = offset + addr1;
2483 ram_addr = cpu_get_physical_page_desc(addr);
2484 cpu_physical_memory_set_dirty(ram_addr);
2485 n++;
2488 return 0;
2490 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2491 void *bitmap, void *opaque)
2493 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2497 * get kvm's dirty pages bitmap and update qemu's
2498 * we only care about physical ram, which resides in slots 0 and 3
2500 int kvm_update_dirty_pages_log(void)
2502 int r = 0;
2505 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2506 NULL,
2507 kvm_get_dirty_bitmap_cb);
2508 return r;
2511 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2512 int log)
2514 if (log)
2515 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2516 else {
2517 #ifdef TARGET_I386
2518 if (must_use_aliases_target(start))
2519 return;
2520 #endif
2521 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2525 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2527 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2528 unsigned int brsize = BITMAP_SIZE(ram_size);
2529 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2530 unsigned int extra_bytes = (extra_pages +7)/8;
2531 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2532 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2534 memset(bitmap, 0xFF, brsize + extra_bytes);
2535 memset(bitmap + hole_start, 0, hole_end - hole_start);
2536 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2538 return 0;
2541 #ifdef KVM_CAP_IRQCHIP
2543 int kvm_set_irq(int irq, int level, int *status)
2545 return kvm_set_irq_level(kvm_context, irq, level, status);
2548 #endif
2550 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2552 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2555 void kvm_mutex_unlock(void)
2557 assert(!cpu_single_env);
2558 pthread_mutex_unlock(&qemu_mutex);
2561 void kvm_mutex_lock(void)
2563 pthread_mutex_lock(&qemu_mutex);
2564 cpu_single_env = NULL;
2567 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2568 void kvm_add_ioperm_data(struct ioperm_data *data)
2570 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2573 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2575 struct ioperm_data *data;
2577 data = LIST_FIRST(&ioperm_head);
2578 while (data) {
2579 struct ioperm_data *next = LIST_NEXT(data, entries);
2581 if (data->start_port == start_port && data->num == num) {
2582 LIST_REMOVE(data, entries);
2583 qemu_free(data);
2586 data = next;
2590 void kvm_ioperm(CPUState *env, void *data)
2592 if (kvm_enabled() && qemu_system_ready)
2593 on_vcpu(env, kvm_arch_do_ioperm, data);
2596 #endif
2598 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2600 #ifndef TARGET_IA64
2602 #ifdef TARGET_I386
2603 if (must_use_aliases_source(start_addr))
2604 return 0;
2605 #endif
2607 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2608 NULL, kvm_get_dirty_bitmap_cb);
2609 #endif
2610 return 0;
2613 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2615 #ifdef TARGET_I386
2616 if (must_use_aliases_source(phys_addr))
2617 return 0;
2618 #endif
2620 #ifndef TARGET_IA64
2621 kvm_qemu_log_memory(phys_addr, len, 1);
2622 #endif
2623 return 0;
2626 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2628 #ifdef TARGET_I386
2629 if (must_use_aliases_source(phys_addr))
2630 return 0;
2631 #endif
2633 #ifndef TARGET_IA64
2634 kvm_qemu_log_memory(phys_addr, len, 0);
2635 #endif
2636 return 0;
2639 void qemu_kvm_cpu_stop(CPUState *env)
2641 if (kvm_enabled())
2642 env->kvm_cpu_state.stopped = 1;
2645 int kvm_set_boot_cpu_id(uint32_t id)
2647 return kvm_set_boot_vcpu_id(kvm_context, id);
2650 #ifdef TARGET_I386
2651 #ifdef KVM_CAP_MCE
2652 struct kvm_x86_mce_data
2654 CPUState *env;
2655 struct kvm_x86_mce *mce;
2658 static void kvm_do_inject_x86_mce(void *_data)
2660 struct kvm_x86_mce_data *data = _data;
2661 int r;
2663 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2664 if (r < 0)
2665 perror("kvm_set_mce FAILED");
2667 #endif
2669 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2670 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2672 #ifdef KVM_CAP_MCE
2673 struct kvm_x86_mce mce = {
2674 .bank = bank,
2675 .status = status,
2676 .mcg_status = mcg_status,
2677 .addr = addr,
2678 .misc = misc,
2680 struct kvm_x86_mce_data data = {
2681 .env = cenv,
2682 .mce = &mce,
2685 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2686 #endif
2688 #endif