check extension
[qemu-kvm/fedora.git] / qemu-kvm.c
blobe200dea9fe716f8c87326bf2c1c062469c637da5
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->kvm_cpu_state.stopped = 1;
96 return handle;
98 #endif
100 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
102 *data = cpu_inb(0, addr);
103 return 0;
106 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
108 *data = cpu_inw(0, addr);
109 return 0;
112 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
114 *data = cpu_inl(0, addr);
115 return 0;
118 #define PM_IO_BASE 0xb000
120 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
122 if (addr == 0xb2) {
123 switch (data) {
124 case 0: {
125 cpu_outb(0, 0xb3, 0);
126 break;
128 case 0xf0: {
129 unsigned x;
131 /* enable acpi */
132 x = cpu_inw(0, PM_IO_BASE + 4);
133 x &= ~1;
134 cpu_outw(0, PM_IO_BASE + 4, x);
135 break;
137 case 0xf1: {
138 unsigned x;
140 /* enable acpi */
141 x = cpu_inw(0, PM_IO_BASE + 4);
142 x |= 1;
143 cpu_outw(0, PM_IO_BASE + 4, x);
144 break;
146 default:
147 break;
149 return 0;
151 cpu_outb(0, addr, data);
152 return 0;
155 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
157 cpu_outw(0, addr, data);
158 return 0;
161 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
163 cpu_outl(0, addr, data);
164 return 0;
167 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
169 cpu_physical_memory_rw(addr, data, len, 0);
170 return 0;
173 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
175 cpu_physical_memory_rw(addr, data, len, 1);
176 return 0;
179 static int handle_unhandled(uint64_t reason)
181 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
182 return -EINVAL;
186 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
188 uint32_t *bitmap = kvm->used_gsi_bitmap;
190 if (gsi < kvm->max_gsi)
191 bitmap[gsi / 32] |= 1U << (gsi % 32);
192 else
193 DPRINTF("Invalid GSI %d\n");
196 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
198 uint32_t *bitmap = kvm->used_gsi_bitmap;
200 if (gsi < kvm->max_gsi)
201 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
202 else
203 DPRINTF("Invalid GSI %d\n");
206 struct slot_info {
207 unsigned long phys_addr;
208 unsigned long len;
209 unsigned long userspace_addr;
210 unsigned flags;
211 int logging_count;
214 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
216 static void init_slots(void)
218 int i;
220 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
221 slots[i].len = 0;
224 static int get_free_slot(kvm_context_t kvm)
226 int i;
227 int tss_ext;
229 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
230 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
231 #else
232 tss_ext = 0;
233 #endif
236 * on older kernels where the set tss ioctl is not supprted we must save
237 * slot 0 to hold the extended memory, as the vmx will use the last 3
238 * pages of this slot.
240 if (tss_ext > 0)
241 i = 0;
242 else
243 i = 1;
245 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
246 if (!slots[i].len)
247 return i;
248 return -1;
251 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
252 unsigned long userspace_addr, unsigned flags)
254 slots[slot].phys_addr = phys_addr;
255 slots[slot].len = len;
256 slots[slot].userspace_addr = userspace_addr;
257 slots[slot].flags = flags;
260 static void free_slot(int slot)
262 slots[slot].len = 0;
263 slots[slot].logging_count = 0;
266 static int get_slot(unsigned long phys_addr)
268 int i;
270 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
271 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
272 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
273 return i;
275 return -1;
278 /* Returns -1 if this slot is not totally contained on any other,
279 * and the number of the slot otherwise */
280 static int get_container_slot(uint64_t phys_addr, unsigned long size)
282 int i;
284 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
285 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
286 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
287 return i;
288 return -1;
291 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
293 int slot = get_container_slot(phys_addr, size);
294 if (slot == -1)
295 return 0;
296 return 1;
300 * dirty pages logging control
302 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
303 unsigned long phys_addr,
304 unsigned flags,
305 unsigned mask)
307 int r = -1;
308 int slot = get_slot(phys_addr);
310 if (slot == -1) {
311 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
312 return 1;
315 flags = (slots[slot].flags & ~mask) | flags;
316 if (flags == slots[slot].flags)
317 return 0;
318 slots[slot].flags = flags;
321 struct kvm_userspace_memory_region mem = {
322 .slot = slot,
323 .memory_size = slots[slot].len,
324 .guest_phys_addr = slots[slot].phys_addr,
325 .userspace_addr = slots[slot].userspace_addr,
326 .flags = slots[slot].flags,
330 DPRINTF("slot %d start %llx len %llx flags %x\n",
331 mem.slot,
332 mem.guest_phys_addr,
333 mem.memory_size,
334 mem.flags);
335 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
336 if (r < 0)
337 fprintf(stderr, "%s: %m\n", __FUNCTION__);
339 return r;
342 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
343 int (*change)(kvm_context_t kvm,
344 uint64_t start,
345 uint64_t len))
347 int i, r;
349 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
350 if (slots[i].len)
351 r = change(kvm, slots[i].phys_addr, slots[i].len);
353 return r;
356 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
357 uint64_t phys_addr,
358 uint64_t len)
360 int slot = get_slot(phys_addr);
362 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
363 if (slot == -1) {
364 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
365 return -EINVAL;
368 if (slots[slot].logging_count++)
369 return 0;
371 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
372 KVM_MEM_LOG_DIRTY_PAGES,
373 KVM_MEM_LOG_DIRTY_PAGES);
376 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
377 uint64_t phys_addr,
378 uint64_t len)
380 int slot = get_slot(phys_addr);
382 if (slot == -1) {
383 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
384 return -EINVAL;
387 if (--slots[slot].logging_count)
388 return 0;
390 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
392 KVM_MEM_LOG_DIRTY_PAGES);
396 * Enable dirty page logging for all memory regions
398 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
400 if (kvm->dirty_pages_log_all)
401 return 0;
402 kvm->dirty_pages_log_all = 1;
403 return kvm_dirty_pages_log_change_all(kvm,
404 kvm_dirty_pages_log_enable_slot);
408 * Enable dirty page logging only for memory regions that were created with
409 * dirty logging enabled (disable for all other memory regions).
411 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
413 if (!kvm->dirty_pages_log_all)
414 return 0;
415 kvm->dirty_pages_log_all = 0;
416 return kvm_dirty_pages_log_change_all(kvm,
417 kvm_dirty_pages_log_disable_slot);
421 int kvm_init(int smp_cpus)
423 int fd;
424 int r, gsi_count;
427 fd = open("/dev/kvm", O_RDWR);
428 if (fd == -1) {
429 perror("open /dev/kvm");
430 return -1;
432 r = ioctl(fd, KVM_GET_API_VERSION, 0);
433 if (r == -1) {
434 fprintf(stderr, "kvm kernel version too old: "
435 "KVM_GET_API_VERSION ioctl not supported\n");
436 goto out_close;
438 if (r < EXPECTED_KVM_API_VERSION) {
439 fprintf(stderr, "kvm kernel version too old: "
440 "We expect API version %d or newer, but got "
441 "version %d\n",
442 EXPECTED_KVM_API_VERSION, r);
443 goto out_close;
445 if (r > EXPECTED_KVM_API_VERSION) {
446 fprintf(stderr, "kvm userspace version too old\n");
447 goto out_close;
449 kvm_abi = r;
450 kvm_page_size = getpagesize();
451 kvm_state = qemu_mallocz(sizeof(*kvm_state));
452 kvm_context = &kvm_state->kvm_context;
454 kvm_state->fd = fd;
455 kvm_state->vmfd = -1;
456 kvm_context->opaque = cpu_single_env;
457 kvm_context->dirty_pages_log_all = 0;
458 kvm_context->no_irqchip_creation = 0;
459 kvm_context->no_pit_creation = 0;
461 #ifdef KVM_CAP_SET_GUEST_DEBUG
462 TAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
463 #endif
465 gsi_count = kvm_get_gsi_count(kvm_context);
466 if (gsi_count > 0) {
467 int gsi_bits, i;
469 /* Round up so we can search ints using ffs */
470 gsi_bits = ALIGN(gsi_count, 32);
471 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
472 kvm_context->max_gsi = gsi_bits;
474 /* Mark any over-allocated bits as already in use */
475 for (i = gsi_count; i < gsi_bits; i++)
476 set_gsi(kvm_context, i);
479 pthread_mutex_lock(&qemu_mutex);
480 return 0;
482 out_close:
483 close(fd);
484 return -1;
487 static void kvm_finalize(KVMState *s)
489 /* FIXME
490 if (kvm->vcpu_fd[0] != -1)
491 close(kvm->vcpu_fd[0]);
492 if (kvm->vm_fd != -1)
493 close(kvm->vm_fd);
495 close(s->fd);
496 free(s);
499 void kvm_disable_irqchip_creation(kvm_context_t kvm)
501 kvm->no_irqchip_creation = 1;
504 void kvm_disable_pit_creation(kvm_context_t kvm)
506 kvm->no_pit_creation = 1;
509 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
511 long mmap_size;
512 int r;
513 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
514 kvm_context_t kvm = kvm_context;
516 vcpu_ctx->kvm = kvm;
517 vcpu_ctx->id = id;
519 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
520 if (r < 0) {
521 fprintf(stderr, "kvm_create_vcpu: %m\n");
522 goto err;
524 vcpu_ctx->fd = r;
526 env->kvm_fd = r;
527 env->kvm_state = kvm_state;
529 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
530 if (mmap_size < 0) {
531 fprintf(stderr, "get vcpu mmap size: %m\n");
532 goto err_fd;
534 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
535 vcpu_ctx->fd, 0);
536 if (vcpu_ctx->run == MAP_FAILED) {
537 fprintf(stderr, "mmap vcpu area: %m\n");
538 goto err_fd;
540 return vcpu_ctx;
541 err_fd:
542 close(vcpu_ctx->fd);
543 err:
544 free(vcpu_ctx);
545 return NULL;
548 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
550 #ifdef KVM_CAP_SET_BOOT_CPU_ID
551 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
552 if (r > 0)
553 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
554 return -ENOSYS;
555 #else
556 return -ENOSYS;
557 #endif
560 int kvm_create_vm(kvm_context_t kvm)
562 int fd;
563 #ifdef KVM_CAP_IRQ_ROUTING
564 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
565 kvm->nr_allocated_irq_routes = 0;
566 #endif
568 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
569 if (fd < 0) {
570 fprintf(stderr, "kvm_create_vm: %m\n");
571 return -1;
573 kvm_state->vmfd = fd;
574 return 0;
577 static int kvm_create_default_phys_mem(kvm_context_t kvm,
578 unsigned long phys_mem_bytes,
579 void **vm_mem)
581 #ifdef KVM_CAP_USER_MEMORY
582 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
583 if (r > 0)
584 return 0;
585 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
586 #else
587 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
588 #endif
589 return -1;
592 void kvm_create_irqchip(kvm_context_t kvm)
594 int r;
596 kvm->irqchip_in_kernel = 0;
597 #ifdef KVM_CAP_IRQCHIP
598 if (!kvm->no_irqchip_creation) {
599 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
600 if (r > 0) { /* kernel irqchip supported */
601 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
602 if (r >= 0) {
603 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
604 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
605 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
606 KVM_CAP_IRQ_INJECT_STATUS);
607 if (r > 0)
608 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
609 #endif
610 kvm->irqchip_in_kernel = 1;
612 else
613 fprintf(stderr, "Create kernel PIC irqchip failed\n");
616 #endif
619 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
621 int r;
623 r = kvm_create_vm(kvm);
624 if (r < 0)
625 return r;
626 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
627 if (r < 0)
628 return r;
629 init_slots();
630 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
631 if (r < 0)
632 return r;
633 kvm_create_irqchip(kvm);
635 return 0;
639 int kvm_register_phys_mem(kvm_context_t kvm,
640 unsigned long phys_start, void *userspace_addr,
641 unsigned long len, int log)
644 struct kvm_userspace_memory_region memory = {
645 .memory_size = len,
646 .guest_phys_addr = phys_start,
647 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
648 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
650 int r;
652 memory.slot = get_free_slot(kvm);
653 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
654 memory.guest_phys_addr, memory.memory_size,
655 memory.userspace_addr, memory.slot, memory.flags);
656 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
657 if (r < 0) {
658 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
659 return -1;
661 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
662 memory.userspace_addr, memory.flags);
663 return 0;
667 /* destroy/free a whole slot.
668 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
670 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
671 unsigned long len)
673 int slot;
674 int r;
675 struct kvm_userspace_memory_region memory = {
676 .memory_size = 0,
677 .guest_phys_addr = phys_start,
678 .userspace_addr = 0,
679 .flags = 0,
682 slot = get_slot(phys_start);
684 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
685 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
686 __FUNCTION__, slot);
687 return;
689 if (phys_start != slots[slot].phys_addr) {
690 fprintf(stderr,
691 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
692 __FUNCTION__, phys_start, slots[slot].phys_addr);
693 phys_start = slots[slot].phys_addr;
696 memory.slot = slot;
697 DPRINTF("slot %d start %llx len %llx flags %x\n",
698 memory.slot,
699 memory.guest_phys_addr,
700 memory.memory_size,
701 memory.flags);
702 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
703 if (r < 0) {
704 fprintf(stderr, "destroy_userspace_phys_mem: %s",
705 strerror(-r));
706 return;
709 free_slot(memory.slot);
712 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
715 int slot = get_container_slot(phys_addr, size);
717 if (slot != -1) {
718 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
719 kvm_destroy_phys_mem(kvm, phys_addr, size);
720 return;
724 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
726 int r;
727 struct kvm_dirty_log log = {
728 .slot = slot,
731 log.dirty_bitmap = buf;
733 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
734 if (r < 0)
735 return r;
736 return 0;
739 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
741 int slot;
743 slot = get_slot(phys_addr);
744 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
747 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
748 unsigned long len, void *opaque,
749 int (*cb)(unsigned long start, unsigned long len,
750 void*bitmap, void *opaque))
752 int i;
753 int r;
754 unsigned long end_addr = phys_addr + len;
755 void *buf;
757 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
758 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
759 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
760 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
761 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
762 if (r) {
763 qemu_free(buf);
764 return r;
766 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
767 qemu_free(buf);
768 if (r)
769 return r;
772 return 0;
775 #ifdef KVM_CAP_IRQCHIP
777 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
779 struct kvm_irq_level event;
780 int r;
782 if (!kvm->irqchip_in_kernel)
783 return 0;
784 event.level = level;
785 event.irq = irq;
786 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
787 if (r < 0)
788 perror("kvm_set_irq_level");
790 if (status) {
791 #ifdef KVM_CAP_IRQ_INJECT_STATUS
792 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
793 1 : event.status;
794 #else
795 *status = 1;
796 #endif
799 return 1;
802 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
804 int r;
806 if (!kvm->irqchip_in_kernel)
807 return 0;
808 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
809 if (r < 0) {
810 perror("kvm_get_irqchip\n");
812 return r;
815 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
817 int r;
819 if (!kvm->irqchip_in_kernel)
820 return 0;
821 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
822 if (r < 0) {
823 perror("kvm_set_irqchip\n");
825 return r;
828 #endif
830 static int handle_io(kvm_vcpu_context_t vcpu)
832 struct kvm_run *run = vcpu->run;
833 kvm_context_t kvm = vcpu->kvm;
834 uint16_t addr = run->io.port;
835 int r;
836 int i;
837 void *p = (void *)run + run->io.data_offset;
839 for (i = 0; i < run->io.count; ++i) {
840 switch (run->io.direction) {
841 case KVM_EXIT_IO_IN:
842 switch (run->io.size) {
843 case 1:
844 r = kvm_inb(kvm->opaque, addr, p);
845 break;
846 case 2:
847 r = kvm_inw(kvm->opaque, addr, p);
848 break;
849 case 4:
850 r = kvm_inl(kvm->opaque, addr, p);
851 break;
852 default:
853 fprintf(stderr, "bad I/O size %d\n", run->io.size);
854 return -EMSGSIZE;
856 break;
857 case KVM_EXIT_IO_OUT:
858 switch (run->io.size) {
859 case 1:
860 r = kvm_outb(kvm->opaque, addr,
861 *(uint8_t *)p);
862 break;
863 case 2:
864 r = kvm_outw(kvm->opaque, addr,
865 *(uint16_t *)p);
866 break;
867 case 4:
868 r = kvm_outl(kvm->opaque, addr,
869 *(uint32_t *)p);
870 break;
871 default:
872 fprintf(stderr, "bad I/O size %d\n", run->io.size);
873 return -EMSGSIZE;
875 break;
876 default:
877 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
878 return -EPROTO;
881 p += run->io.size;
884 return 0;
887 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
889 #ifdef KVM_CAP_SET_GUEST_DEBUG
890 struct kvm_run *run = vcpu->run;
891 kvm_context_t kvm = vcpu->kvm;
893 return kvm_debug(kvm->opaque, env, &run->debug.arch);
894 #else
895 return 0;
896 #endif
899 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
901 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
904 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
906 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
909 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
911 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
914 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
916 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
919 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
921 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
924 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
926 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
929 #ifdef KVM_CAP_MP_STATE
930 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
932 int r;
934 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
935 if (r > 0)
936 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
937 return -ENOSYS;
940 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
942 int r;
944 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
945 if (r > 0)
946 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
947 return -ENOSYS;
949 #endif
951 static int handle_mmio(kvm_vcpu_context_t vcpu)
953 unsigned long addr = vcpu->run->mmio.phys_addr;
954 kvm_context_t kvm = vcpu->kvm;
955 struct kvm_run *kvm_run = vcpu->run;
956 void *data = kvm_run->mmio.data;
958 /* hack: Red Hat 7.1 generates these weird accesses. */
959 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
960 return 0;
962 if (kvm_run->mmio.is_write)
963 return kvm_mmio_write(kvm->opaque, addr, data,
964 kvm_run->mmio.len);
965 else
966 return kvm_mmio_read(kvm->opaque, addr, data,
967 kvm_run->mmio.len);
970 int handle_io_window(kvm_context_t kvm)
972 return 1;
975 int handle_halt(kvm_vcpu_context_t vcpu)
977 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
980 int handle_shutdown(kvm_context_t kvm, CPUState *env)
982 /* stop the current vcpu from going back to guest mode */
983 env->kvm_cpu_state.stopped = 1;
985 qemu_system_reset_request();
986 return 1;
989 static inline void push_nmi(kvm_context_t kvm)
991 #ifdef KVM_CAP_USER_NMI
992 kvm_arch_push_nmi(kvm->opaque);
993 #endif /* KVM_CAP_USER_NMI */
996 void post_kvm_run(kvm_context_t kvm, CPUState *env)
998 pthread_mutex_lock(&qemu_mutex);
999 kvm_arch_post_kvm_run(kvm->opaque, env);
1002 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
1004 kvm_arch_pre_kvm_run(kvm->opaque, env);
1006 if (env->exit_request)
1007 return 1;
1008 pthread_mutex_unlock(&qemu_mutex);
1009 return 0;
1012 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
1014 return vcpu->run->if_flag;
1017 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1019 return vcpu->run->ready_for_interrupt_injection;
1022 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1024 int r;
1025 int fd = vcpu->fd;
1026 struct kvm_run *run = vcpu->run;
1027 kvm_context_t kvm = vcpu->kvm;
1029 again:
1030 push_nmi(kvm);
1031 #if !defined(__s390__)
1032 if (!kvm->irqchip_in_kernel)
1033 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1034 #endif
1035 r = pre_kvm_run(kvm, env);
1036 if (r)
1037 return r;
1038 r = ioctl(fd, KVM_RUN, 0);
1040 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1041 r = -errno;
1042 post_kvm_run(kvm, env);
1043 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1044 return r;
1047 post_kvm_run(kvm, env);
1049 #if defined(KVM_CAP_COALESCED_MMIO)
1050 if (kvm->coalesced_mmio) {
1051 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1052 kvm->coalesced_mmio * PAGE_SIZE;
1053 while (ring->first != ring->last) {
1054 kvm_mmio_write(kvm->opaque,
1055 ring->coalesced_mmio[ring->first].phys_addr,
1056 &ring->coalesced_mmio[ring->first].data[0],
1057 ring->coalesced_mmio[ring->first].len);
1058 smp_wmb();
1059 ring->first = (ring->first + 1) %
1060 KVM_COALESCED_MMIO_MAX;
1063 #endif
1065 #if !defined(__s390__)
1066 if (r == -1) {
1067 r = handle_io_window(kvm);
1068 goto more;
1070 #endif
1071 if (1) {
1072 switch (run->exit_reason) {
1073 case KVM_EXIT_UNKNOWN:
1074 r = handle_unhandled(run->hw.hardware_exit_reason);
1075 break;
1076 case KVM_EXIT_FAIL_ENTRY:
1077 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1078 break;
1079 case KVM_EXIT_EXCEPTION:
1080 fprintf(stderr, "exception %d (%x)\n",
1081 run->ex.exception,
1082 run->ex.error_code);
1083 kvm_show_regs(vcpu);
1084 kvm_show_code(vcpu);
1085 abort();
1086 break;
1087 case KVM_EXIT_IO:
1088 r = handle_io(vcpu);
1089 break;
1090 case KVM_EXIT_DEBUG:
1091 r = handle_debug(vcpu, env);
1092 break;
1093 case KVM_EXIT_MMIO:
1094 r = handle_mmio(vcpu);
1095 break;
1096 case KVM_EXIT_HLT:
1097 r = handle_halt(vcpu);
1098 break;
1099 case KVM_EXIT_IRQ_WINDOW_OPEN:
1100 break;
1101 case KVM_EXIT_SHUTDOWN:
1102 r = handle_shutdown(kvm, env);
1103 break;
1104 #if defined(__s390__)
1105 case KVM_EXIT_S390_SIEIC:
1106 r = kvm_s390_handle_intercept(kvm, vcpu,
1107 run);
1108 break;
1109 case KVM_EXIT_S390_RESET:
1110 r = kvm_s390_handle_reset(kvm, vcpu, run);
1111 break;
1112 #endif
1113 default:
1114 if (kvm_arch_run(vcpu)) {
1115 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1116 run->exit_reason);
1117 kvm_show_regs(vcpu);
1118 abort();
1120 break;
1123 more:
1124 if (!r)
1125 goto again;
1126 return r;
1129 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1131 struct kvm_interrupt intr;
1133 intr.irq = irq;
1134 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1137 #ifdef KVM_CAP_SET_GUEST_DEBUG
1138 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1140 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1142 #endif
1144 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1146 struct kvm_signal_mask *sigmask;
1147 int r;
1149 if (!sigset) {
1150 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1151 if (r == -1)
1152 r = -errno;
1153 return r;
1155 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1157 sigmask->len = 8;
1158 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1159 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1160 if (r == -1)
1161 r = -errno;
1162 free(sigmask);
1163 return r;
1166 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1168 return kvm->irqchip_in_kernel;
1171 int kvm_pit_in_kernel(kvm_context_t kvm)
1173 return kvm->pit_in_kernel;
1176 int kvm_has_sync_mmu(void)
1178 int r = 0;
1179 #ifdef KVM_CAP_SYNC_MMU
1180 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1181 #endif
1182 return r;
1185 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1187 #ifdef KVM_CAP_USER_NMI
1188 return ioctl(vcpu->fd, KVM_NMI);
1189 #else
1190 return -ENOSYS;
1191 #endif
1194 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1196 int r = 0;
1197 kvm->coalesced_mmio = 0;
1198 #ifdef KVM_CAP_COALESCED_MMIO
1199 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1200 if (r > 0) {
1201 kvm->coalesced_mmio = r;
1202 return 0;
1204 #endif
1205 return r;
1208 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1210 #ifdef KVM_CAP_COALESCED_MMIO
1211 kvm_context_t kvm = kvm_context;
1212 struct kvm_coalesced_mmio_zone zone;
1213 int r;
1215 if (kvm->coalesced_mmio) {
1217 zone.addr = addr;
1218 zone.size = size;
1220 r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone);
1221 if (r < 0) {
1222 perror("kvm_register_coalesced_mmio_zone");
1223 return r;
1225 return 0;
1227 #endif
1228 return -ENOSYS;
1231 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1233 #ifdef KVM_CAP_COALESCED_MMIO
1234 kvm_context_t kvm = kvm_context;
1235 struct kvm_coalesced_mmio_zone zone;
1236 int r;
1238 if (kvm->coalesced_mmio) {
1240 zone.addr = addr;
1241 zone.size = size;
1243 r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1244 if (r < 0) {
1245 perror("kvm_unregister_coalesced_mmio_zone");
1246 return r;
1248 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1249 return 0;
1251 #endif
1252 return -ENOSYS;
1255 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1256 int kvm_assign_pci_device(kvm_context_t kvm,
1257 struct kvm_assigned_pci_dev *assigned_dev)
1259 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1262 static int kvm_old_assign_irq(kvm_context_t kvm,
1263 struct kvm_assigned_irq *assigned_irq)
1265 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1268 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1269 int kvm_assign_irq(kvm_context_t kvm,
1270 struct kvm_assigned_irq *assigned_irq)
1272 int ret;
1274 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1275 if (ret > 0) {
1276 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1279 return kvm_old_assign_irq(kvm, assigned_irq);
1282 int kvm_deassign_irq(kvm_context_t kvm,
1283 struct kvm_assigned_irq *assigned_irq)
1285 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1287 #else
1288 int kvm_assign_irq(kvm_context_t kvm,
1289 struct kvm_assigned_irq *assigned_irq)
1291 return kvm_old_assign_irq(kvm, assigned_irq);
1293 #endif
1294 #endif
1296 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1297 int kvm_deassign_pci_device(kvm_context_t kvm,
1298 struct kvm_assigned_pci_dev *assigned_dev)
1300 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1302 #endif
1304 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1306 int ret = 0;
1308 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1309 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1310 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1311 if (ret <= 0)
1312 ret = 0;
1313 #endif
1314 return ret;
1317 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1319 #ifdef KVM_CAP_REINJECT_CONTROL
1320 int r;
1321 struct kvm_reinject_control control;
1323 control.pit_reinject = pit_reinject;
1325 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1326 if (r > 0) {
1327 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1329 #endif
1330 return -ENOSYS;
1333 int kvm_has_gsi_routing(kvm_context_t kvm)
1335 int r = 0;
1337 #ifdef KVM_CAP_IRQ_ROUTING
1338 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1339 #endif
1340 return r;
1343 int kvm_get_gsi_count(kvm_context_t kvm)
1345 #ifdef KVM_CAP_IRQ_ROUTING
1346 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1347 #else
1348 return -EINVAL;
1349 #endif
1352 int kvm_clear_gsi_routes(kvm_context_t kvm)
1354 #ifdef KVM_CAP_IRQ_ROUTING
1355 kvm->irq_routes->nr = 0;
1356 return 0;
1357 #else
1358 return -EINVAL;
1359 #endif
1362 int kvm_add_routing_entry(kvm_context_t kvm,
1363 struct kvm_irq_routing_entry* entry)
1365 #ifdef KVM_CAP_IRQ_ROUTING
1366 struct kvm_irq_routing *z;
1367 struct kvm_irq_routing_entry *new;
1368 int n, size;
1370 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1371 n = kvm->nr_allocated_irq_routes * 2;
1372 if (n < 64)
1373 n = 64;
1374 size = sizeof(struct kvm_irq_routing);
1375 size += n * sizeof(*new);
1376 z = realloc(kvm->irq_routes, size);
1377 if (!z)
1378 return -ENOMEM;
1379 kvm->nr_allocated_irq_routes = n;
1380 kvm->irq_routes = z;
1382 n = kvm->irq_routes->nr++;
1383 new = &kvm->irq_routes->entries[n];
1384 memset(new, 0, sizeof(*new));
1385 new->gsi = entry->gsi;
1386 new->type = entry->type;
1387 new->flags = entry->flags;
1388 new->u = entry->u;
1390 set_gsi(kvm, entry->gsi);
1392 return 0;
1393 #else
1394 return -ENOSYS;
1395 #endif
1398 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1400 #ifdef KVM_CAP_IRQ_ROUTING
1401 struct kvm_irq_routing_entry e;
1403 e.gsi = gsi;
1404 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1405 e.flags = 0;
1406 e.u.irqchip.irqchip = irqchip;
1407 e.u.irqchip.pin = pin;
1408 return kvm_add_routing_entry(kvm, &e);
1409 #else
1410 return -ENOSYS;
1411 #endif
1414 int kvm_del_routing_entry(kvm_context_t kvm,
1415 struct kvm_irq_routing_entry* entry)
1417 #ifdef KVM_CAP_IRQ_ROUTING
1418 struct kvm_irq_routing_entry *e, *p;
1419 int i, gsi, found = 0;
1421 gsi = entry->gsi;
1423 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1424 e = &kvm->irq_routes->entries[i];
1425 if (e->type == entry->type
1426 && e->gsi == gsi) {
1427 switch (e->type)
1429 case KVM_IRQ_ROUTING_IRQCHIP: {
1430 if (e->u.irqchip.irqchip ==
1431 entry->u.irqchip.irqchip
1432 && e->u.irqchip.pin ==
1433 entry->u.irqchip.pin) {
1434 p = &kvm->irq_routes->
1435 entries[--kvm->irq_routes->nr];
1436 *e = *p;
1437 found = 1;
1439 break;
1441 case KVM_IRQ_ROUTING_MSI: {
1442 if (e->u.msi.address_lo ==
1443 entry->u.msi.address_lo
1444 && e->u.msi.address_hi ==
1445 entry->u.msi.address_hi
1446 && e->u.msi.data == entry->u.msi.data) {
1447 p = &kvm->irq_routes->
1448 entries[--kvm->irq_routes->nr];
1449 *e = *p;
1450 found = 1;
1452 break;
1454 default:
1455 break;
1457 if (found) {
1458 /* If there are no other users of this GSI
1459 * mark it available in the bitmap */
1460 for (i = 0; i < kvm->irq_routes->nr; i++) {
1461 e = &kvm->irq_routes->entries[i];
1462 if (e->gsi == gsi)
1463 break;
1465 if (i == kvm->irq_routes->nr)
1466 clear_gsi(kvm, gsi);
1468 return 0;
1472 return -ESRCH;
1473 #else
1474 return -ENOSYS;
1475 #endif
1478 int kvm_update_routing_entry(kvm_context_t kvm,
1479 struct kvm_irq_routing_entry* entry,
1480 struct kvm_irq_routing_entry* newentry)
1482 #ifdef KVM_CAP_IRQ_ROUTING
1483 struct kvm_irq_routing_entry *e;
1484 int i;
1486 if (entry->gsi != newentry->gsi ||
1487 entry->type != newentry->type) {
1488 return -EINVAL;
1491 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1492 e = &kvm->irq_routes->entries[i];
1493 if (e->type != entry->type || e->gsi != entry->gsi) {
1494 continue;
1496 switch (e->type) {
1497 case KVM_IRQ_ROUTING_IRQCHIP:
1498 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1499 e->u.irqchip.pin == entry->u.irqchip.pin) {
1500 memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
1501 return 0;
1503 break;
1504 case KVM_IRQ_ROUTING_MSI:
1505 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1506 e->u.msi.address_hi == entry->u.msi.address_hi &&
1507 e->u.msi.data == entry->u.msi.data) {
1508 memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
1509 return 0;
1511 break;
1512 default:
1513 break;
1516 return -ESRCH;
1517 #else
1518 return -ENOSYS;
1519 #endif
1522 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1524 #ifdef KVM_CAP_IRQ_ROUTING
1525 struct kvm_irq_routing_entry e;
1527 e.gsi = gsi;
1528 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1529 e.flags = 0;
1530 e.u.irqchip.irqchip = irqchip;
1531 e.u.irqchip.pin = pin;
1532 return kvm_del_routing_entry(kvm, &e);
1533 #else
1534 return -ENOSYS;
1535 #endif
1538 int kvm_commit_irq_routes(kvm_context_t kvm)
1540 #ifdef KVM_CAP_IRQ_ROUTING
1541 kvm->irq_routes->flags = 0;
1542 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1543 #else
1544 return -ENOSYS;
1545 #endif
1548 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1550 int i, bit;
1551 uint32_t *buf = kvm->used_gsi_bitmap;
1553 /* Return the lowest unused GSI in the bitmap */
1554 for (i = 0; i < kvm->max_gsi / 32; i++) {
1555 bit = ffs(~buf[i]);
1556 if (!bit)
1557 continue;
1559 return bit - 1 + i * 32;
1562 return -ENOSPC;
1565 #ifdef KVM_CAP_DEVICE_MSIX
1566 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1567 struct kvm_assigned_msix_nr *msix_nr)
1569 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1572 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1573 struct kvm_assigned_msix_entry *entry)
1575 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1577 #endif
1579 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1581 #include <sys/eventfd.h>
1583 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1585 struct kvm_irqfd data = {
1586 .fd = fd,
1587 .gsi = gsi,
1588 .flags = flags,
1591 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1594 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1596 int r;
1597 int fd;
1599 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1600 return -ENOENT;
1602 fd = eventfd(0, 0);
1603 if (fd < 0)
1604 return -errno;
1606 r = _kvm_irqfd(kvm, fd, gsi, 0);
1607 if (r < 0) {
1608 close(fd);
1609 return -errno;
1612 return fd;
1615 #else /* KVM_CAP_IRQFD */
1617 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1619 return -ENOSYS;
1622 #endif /* KVM_CAP_IRQFD */
1623 static inline unsigned long kvm_get_thread_id(void)
1625 return syscall(SYS_gettid);
1628 static void qemu_cond_wait(pthread_cond_t *cond)
1630 CPUState *env = cpu_single_env;
1631 static const struct timespec ts = {
1632 .tv_sec = 0,
1633 .tv_nsec = 100000,
1636 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1637 cpu_single_env = env;
1640 static void sig_ipi_handler(int n)
1644 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1646 struct qemu_work_item wi;
1648 if (env == current_env) {
1649 func(data);
1650 return;
1653 wi.func = func;
1654 wi.data = data;
1655 if (!env->kvm_cpu_state.queued_work_first)
1656 env->kvm_cpu_state.queued_work_first = &wi;
1657 else
1658 env->kvm_cpu_state.queued_work_last->next = &wi;
1659 env->kvm_cpu_state.queued_work_last = &wi;
1660 wi.next = NULL;
1661 wi.done = false;
1663 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1664 while (!wi.done)
1665 qemu_cond_wait(&qemu_work_cond);
1668 static void inject_interrupt(void *data)
1670 cpu_interrupt(current_env, (long)data);
1673 void kvm_inject_interrupt(CPUState *env, int mask)
1675 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1678 void kvm_update_interrupt_request(CPUState *env)
1680 int signal = 0;
1682 if (env) {
1683 if (!current_env || !current_env->kvm_cpu_state.created)
1684 signal = 1;
1686 * Testing for created here is really redundant
1688 if (current_env && current_env->kvm_cpu_state.created &&
1689 env != current_env && !env->kvm_cpu_state.signalled)
1690 signal = 1;
1692 if (signal) {
1693 env->kvm_cpu_state.signalled = 1;
1694 if (env->kvm_cpu_state.thread)
1695 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1700 static void kvm_do_load_registers(void *_env)
1702 CPUState *env = _env;
1704 kvm_arch_load_regs(env);
1707 void kvm_load_registers(CPUState *env)
1709 if (kvm_enabled() && qemu_system_ready)
1710 on_vcpu(env, kvm_do_load_registers, env);
1713 static void kvm_do_save_registers(void *_env)
1715 CPUState *env = _env;
1717 kvm_arch_save_regs(env);
1720 void kvm_save_registers(CPUState *env)
1722 if (kvm_enabled())
1723 on_vcpu(env, kvm_do_save_registers, env);
1726 static void kvm_do_load_mpstate(void *_env)
1728 CPUState *env = _env;
1730 kvm_arch_load_mpstate(env);
1733 void kvm_load_mpstate(CPUState *env)
1735 if (kvm_enabled() && qemu_system_ready)
1736 on_vcpu(env, kvm_do_load_mpstate, env);
1739 static void kvm_do_save_mpstate(void *_env)
1741 CPUState *env = _env;
1743 kvm_arch_save_mpstate(env);
1744 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1747 void kvm_save_mpstate(CPUState *env)
1749 if (kvm_enabled())
1750 on_vcpu(env, kvm_do_save_mpstate, env);
1753 int kvm_cpu_exec(CPUState *env)
1755 int r;
1757 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1758 if (r < 0) {
1759 printf("kvm_run returned %d\n", r);
1760 vm_stop(0);
1763 return 0;
1766 static int is_cpu_stopped(CPUState *env)
1768 return !vm_running || env->kvm_cpu_state.stopped;
1771 static void flush_queued_work(CPUState *env)
1773 struct qemu_work_item *wi;
1775 if (!env->kvm_cpu_state.queued_work_first)
1776 return;
1778 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1779 env->kvm_cpu_state.queued_work_first = wi->next;
1780 wi->func(wi->data);
1781 wi->done = true;
1783 env->kvm_cpu_state.queued_work_last = NULL;
1784 pthread_cond_broadcast(&qemu_work_cond);
1787 static void kvm_main_loop_wait(CPUState *env, int timeout)
1789 struct timespec ts;
1790 int r, e;
1791 siginfo_t siginfo;
1792 sigset_t waitset;
1794 pthread_mutex_unlock(&qemu_mutex);
1796 ts.tv_sec = timeout / 1000;
1797 ts.tv_nsec = (timeout % 1000) * 1000000;
1798 sigemptyset(&waitset);
1799 sigaddset(&waitset, SIG_IPI);
1801 r = sigtimedwait(&waitset, &siginfo, &ts);
1802 e = errno;
1804 pthread_mutex_lock(&qemu_mutex);
1806 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1807 printf("sigtimedwait: %s\n", strerror(e));
1808 exit(1);
1811 cpu_single_env = env;
1812 flush_queued_work(env);
1814 if (env->kvm_cpu_state.stop) {
1815 env->kvm_cpu_state.stop = 0;
1816 env->kvm_cpu_state.stopped = 1;
1817 pthread_cond_signal(&qemu_pause_cond);
1820 env->kvm_cpu_state.signalled = 0;
1823 static int all_threads_paused(void)
1825 CPUState *penv = first_cpu;
1827 while (penv) {
1828 if (penv->kvm_cpu_state.stop)
1829 return 0;
1830 penv = (CPUState *)penv->next_cpu;
1833 return 1;
1836 static void pause_all_threads(void)
1838 CPUState *penv = first_cpu;
1840 while (penv) {
1841 if (penv != cpu_single_env) {
1842 penv->kvm_cpu_state.stop = 1;
1843 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1844 } else {
1845 penv->kvm_cpu_state.stop = 0;
1846 penv->kvm_cpu_state.stopped = 1;
1847 cpu_exit(penv);
1849 penv = (CPUState *)penv->next_cpu;
1852 while (!all_threads_paused())
1853 qemu_cond_wait(&qemu_pause_cond);
1856 static void resume_all_threads(void)
1858 CPUState *penv = first_cpu;
1860 assert(!cpu_single_env);
1862 while (penv) {
1863 penv->kvm_cpu_state.stop = 0;
1864 penv->kvm_cpu_state.stopped = 0;
1865 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1866 penv = (CPUState *)penv->next_cpu;
1870 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1872 if (running)
1873 resume_all_threads();
1874 else
1875 pause_all_threads();
1878 static void setup_kernel_sigmask(CPUState *env)
1880 sigset_t set;
1882 sigemptyset(&set);
1883 sigaddset(&set, SIGUSR2);
1884 sigaddset(&set, SIGIO);
1885 sigaddset(&set, SIGALRM);
1886 sigprocmask(SIG_BLOCK, &set, NULL);
1888 sigprocmask(SIG_BLOCK, NULL, &set);
1889 sigdelset(&set, SIG_IPI);
1891 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1894 static void qemu_kvm_system_reset(void)
1896 CPUState *penv = first_cpu;
1898 pause_all_threads();
1900 qemu_system_reset();
1902 while (penv) {
1903 kvm_arch_cpu_reset(penv);
1904 penv = (CPUState *)penv->next_cpu;
1907 resume_all_threads();
1910 static void process_irqchip_events(CPUState *env)
1912 kvm_arch_process_irqchip_events(env);
1913 if (kvm_arch_has_work(env))
1914 env->halted = 0;
1917 static int kvm_main_loop_cpu(CPUState *env)
1919 setup_kernel_sigmask(env);
1921 pthread_mutex_lock(&qemu_mutex);
1923 kvm_qemu_init_env(env);
1924 #ifdef TARGET_I386
1925 kvm_tpr_vcpu_start(env);
1926 #endif
1928 cpu_single_env = env;
1929 kvm_arch_load_regs(env);
1931 while (1) {
1932 int run_cpu = !is_cpu_stopped(env);
1933 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1934 process_irqchip_events(env);
1935 run_cpu = !env->halted;
1937 if (run_cpu) {
1938 kvm_main_loop_wait(env, 0);
1939 kvm_cpu_exec(env);
1940 } else {
1941 kvm_main_loop_wait(env, 1000);
1944 pthread_mutex_unlock(&qemu_mutex);
1945 return 0;
1948 static void *ap_main_loop(void *_env)
1950 CPUState *env = _env;
1951 sigset_t signals;
1952 struct ioperm_data *data = NULL;
1954 current_env = env;
1955 env->thread_id = kvm_get_thread_id();
1956 sigfillset(&signals);
1957 sigprocmask(SIG_BLOCK, &signals, NULL);
1958 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1960 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1961 /* do ioperm for io ports of assigned devices */
1962 LIST_FOREACH(data, &ioperm_head, entries)
1963 on_vcpu(env, kvm_arch_do_ioperm, data);
1964 #endif
1966 /* signal VCPU creation */
1967 pthread_mutex_lock(&qemu_mutex);
1968 current_env->kvm_cpu_state.created = 1;
1969 pthread_cond_signal(&qemu_vcpu_cond);
1971 /* and wait for machine initialization */
1972 while (!qemu_system_ready)
1973 qemu_cond_wait(&qemu_system_cond);
1974 pthread_mutex_unlock(&qemu_mutex);
1976 kvm_main_loop_cpu(env);
1977 return NULL;
1980 void kvm_init_vcpu(CPUState *env)
1982 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1984 while (env->kvm_cpu_state.created == 0)
1985 qemu_cond_wait(&qemu_vcpu_cond);
1988 int kvm_vcpu_inited(CPUState *env)
1990 return env->kvm_cpu_state.created;
1993 #ifdef TARGET_I386
1994 void kvm_hpet_disable_kpit(void)
1996 struct kvm_pit_state2 ps2;
1998 kvm_get_pit2(kvm_context, &ps2);
1999 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
2000 kvm_set_pit2(kvm_context, &ps2);
2003 void kvm_hpet_enable_kpit(void)
2005 struct kvm_pit_state2 ps2;
2007 kvm_get_pit2(kvm_context, &ps2);
2008 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
2009 kvm_set_pit2(kvm_context, &ps2);
2011 #endif
2013 int kvm_init_ap(void)
2015 #ifdef TARGET_I386
2016 kvm_tpr_opt_setup();
2017 #endif
2018 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2020 signal(SIG_IPI, sig_ipi_handler);
2021 return 0;
2024 void qemu_kvm_notify_work(void)
2026 uint64_t value = 1;
2027 char buffer[8];
2028 size_t offset = 0;
2030 if (io_thread_fd == -1)
2031 return;
2033 memcpy(buffer, &value, sizeof(value));
2035 while (offset < 8) {
2036 ssize_t len;
2038 len = write(io_thread_fd, buffer + offset, 8 - offset);
2039 if (len == -1 && errno == EINTR)
2040 continue;
2042 /* In case we have a pipe, there is not reason to insist writing
2043 * 8 bytes
2045 if (len == -1 && errno == EAGAIN)
2046 break;
2048 if (len <= 0)
2049 break;
2051 offset += len;
2055 /* If we have signalfd, we mask out the signals we want to handle and then
2056 * use signalfd to listen for them. We rely on whatever the current signal
2057 * handler is to dispatch the signals when we receive them.
2060 static void sigfd_handler(void *opaque)
2062 int fd = (unsigned long)opaque;
2063 struct qemu_signalfd_siginfo info;
2064 struct sigaction action;
2065 ssize_t len;
2067 while (1) {
2068 do {
2069 len = read(fd, &info, sizeof(info));
2070 } while (len == -1 && errno == EINTR);
2072 if (len == -1 && errno == EAGAIN)
2073 break;
2075 if (len != sizeof(info)) {
2076 printf("read from sigfd returned %zd: %m\n", len);
2077 return;
2080 sigaction(info.ssi_signo, NULL, &action);
2081 if (action.sa_handler)
2082 action.sa_handler(info.ssi_signo);
2087 /* Used to break IO thread out of select */
2088 static void io_thread_wakeup(void *opaque)
2090 int fd = (unsigned long)opaque;
2091 char buffer[4096];
2093 /* Drain the pipe/(eventfd) */
2094 while (1) {
2095 ssize_t len;
2097 len = read(fd, buffer, sizeof(buffer));
2098 if (len == -1 && errno == EINTR)
2099 continue;
2101 if (len <= 0)
2102 break;
2106 int kvm_main_loop(void)
2108 int fds[2];
2109 sigset_t mask;
2110 int sigfd;
2112 io_thread = pthread_self();
2113 qemu_system_ready = 1;
2115 if (qemu_eventfd(fds) == -1) {
2116 fprintf(stderr, "failed to create eventfd\n");
2117 return -errno;
2120 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2121 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2123 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2124 (void *)(unsigned long)fds[0]);
2126 io_thread_fd = fds[1];
2128 sigemptyset(&mask);
2129 sigaddset(&mask, SIGIO);
2130 sigaddset(&mask, SIGALRM);
2131 sigprocmask(SIG_BLOCK, &mask, NULL);
2133 sigfd = qemu_signalfd(&mask);
2134 if (sigfd == -1) {
2135 fprintf(stderr, "failed to create signalfd\n");
2136 return -errno;
2139 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2141 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2142 (void *)(unsigned long)sigfd);
2144 pthread_cond_broadcast(&qemu_system_cond);
2146 io_thread_sigfd = sigfd;
2147 cpu_single_env = NULL;
2149 while (1) {
2150 main_loop_wait(1000);
2151 if (qemu_shutdown_requested()) {
2152 if (qemu_no_shutdown()) {
2153 vm_stop(0);
2154 } else
2155 break;
2156 } else if (qemu_powerdown_requested())
2157 qemu_system_powerdown();
2158 else if (qemu_reset_requested())
2159 qemu_kvm_system_reset();
2160 else if (kvm_debug_cpu_requested) {
2161 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2162 vm_stop(EXCP_DEBUG);
2163 kvm_debug_cpu_requested = NULL;
2167 pause_all_threads();
2168 pthread_mutex_unlock(&qemu_mutex);
2170 return 0;
2173 #ifdef TARGET_I386
2174 static int destroy_region_works = 0;
2175 #endif
2178 #if !defined(TARGET_I386)
2179 int kvm_arch_init_irq_routing(void)
2181 return 0;
2183 #endif
2185 int kvm_qemu_create_context(void)
2187 int r;
2189 if (!kvm_irqchip) {
2190 kvm_disable_irqchip_creation(kvm_context);
2192 if (!kvm_pit) {
2193 kvm_disable_pit_creation(kvm_context);
2195 if (kvm_create(kvm_context, 0, NULL) < 0) {
2196 kvm_finalize(kvm_state);
2197 return -1;
2199 r = kvm_arch_qemu_create_context();
2200 if(r <0)
2201 kvm_finalize(kvm_state);
2202 if (kvm_pit && !kvm_pit_reinject) {
2203 if (kvm_reinject_control(kvm_context, 0)) {
2204 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2205 return -1;
2208 #ifdef TARGET_I386
2209 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2210 #endif
2212 r = kvm_arch_init_irq_routing();
2213 if (r < 0) {
2214 return r;
2217 return 0;
2220 #ifdef TARGET_I386
2221 static int must_use_aliases_source(target_phys_addr_t addr)
2223 if (destroy_region_works)
2224 return false;
2225 if (addr == 0xa0000 || addr == 0xa8000)
2226 return true;
2227 return false;
2230 static int must_use_aliases_target(target_phys_addr_t addr)
2232 if (destroy_region_works)
2233 return false;
2234 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2235 return true;
2236 return false;
2239 static struct mapping {
2240 target_phys_addr_t phys;
2241 ram_addr_t ram;
2242 ram_addr_t len;
2243 } mappings[50];
2244 static int nr_mappings;
2246 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2248 struct mapping *p;
2250 for (p = mappings; p < mappings + nr_mappings; ++p) {
2251 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2252 return p;
2255 return NULL;
2258 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2260 struct mapping *p;
2262 for (p = mappings; p < mappings + nr_mappings; ++p) {
2263 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2264 return p;
2267 return NULL;
2270 static void drop_mapping(target_phys_addr_t start_addr)
2272 struct mapping *p = find_mapping(start_addr);
2274 if (p)
2275 *p = mappings[--nr_mappings];
2277 #endif
2279 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2280 ram_addr_t phys_offset)
2282 int r = 0;
2283 unsigned long area_flags;
2284 #ifdef TARGET_I386
2285 struct mapping *p;
2286 #endif
2288 if (start_addr + size > phys_ram_size) {
2289 phys_ram_size = start_addr + size;
2292 phys_offset &= ~IO_MEM_ROM;
2293 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2295 if (area_flags != IO_MEM_RAM) {
2296 #ifdef TARGET_I386
2297 if (must_use_aliases_source(start_addr)) {
2298 kvm_destroy_memory_alias(kvm_context, start_addr);
2299 return;
2301 if (must_use_aliases_target(start_addr))
2302 return;
2303 #endif
2304 while (size > 0) {
2305 p = find_mapping(start_addr);
2306 if (p) {
2307 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2308 drop_mapping(p->phys);
2310 start_addr += TARGET_PAGE_SIZE;
2311 if (size > TARGET_PAGE_SIZE) {
2312 size -= TARGET_PAGE_SIZE;
2313 } else {
2314 size = 0;
2317 return;
2320 r = kvm_is_containing_region(kvm_context, start_addr, size);
2321 if (r)
2322 return;
2324 if (area_flags >= TLB_MMIO)
2325 return;
2327 #ifdef TARGET_I386
2328 if (must_use_aliases_source(start_addr)) {
2329 p = find_ram_mapping(phys_offset);
2330 if (p) {
2331 kvm_create_memory_alias(kvm_context, start_addr, size,
2332 p->phys + (phys_offset - p->ram));
2334 return;
2336 #endif
2338 r = kvm_register_phys_mem(kvm_context, start_addr,
2339 qemu_get_ram_ptr(phys_offset),
2340 size, 0);
2341 if (r < 0) {
2342 printf("kvm_cpu_register_physical_memory: failed\n");
2343 exit(1);
2346 #ifdef TARGET_I386
2347 drop_mapping(start_addr);
2348 p = &mappings[nr_mappings++];
2349 p->phys = start_addr;
2350 p->ram = phys_offset;
2351 p->len = size;
2352 #endif
2354 return;
2357 int kvm_setup_guest_memory(void *area, unsigned long size)
2359 int ret = 0;
2361 #ifdef MADV_DONTFORK
2362 if (kvm_enabled() && !kvm_has_sync_mmu())
2363 ret = madvise(area, size, MADV_DONTFORK);
2364 #endif
2366 if (ret)
2367 perror ("madvise");
2369 return ret;
2372 int kvm_qemu_check_extension(int ext)
2374 return kvm_check_extension(kvm_state, ext);
2377 int kvm_qemu_init_env(CPUState *cenv)
2379 return kvm_arch_qemu_init_env(cenv);
2382 #ifdef KVM_CAP_SET_GUEST_DEBUG
2384 struct kvm_set_guest_debug_data {
2385 struct kvm_guest_debug dbg;
2386 int err;
2389 static void kvm_invoke_set_guest_debug(void *data)
2391 struct kvm_set_guest_debug_data *dbg_data = data;
2393 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2394 &dbg_data->dbg);
2397 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2399 struct kvm_set_guest_debug_data data;
2401 data.dbg.control = 0;
2402 if (env->singlestep_enabled)
2403 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2405 kvm_arch_update_guest_debug(env, &data.dbg);
2406 data.dbg.control |= reinject_trap;
2408 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2409 return data.err;
2412 #endif
2415 * dirty pages logging
2417 /* FIXME: use unsigned long pointer instead of unsigned char */
2418 unsigned char *kvm_dirty_bitmap = NULL;
2419 int kvm_physical_memory_set_dirty_tracking(int enable)
2421 int r = 0;
2423 if (!kvm_enabled())
2424 return 0;
2426 if (enable) {
2427 if (!kvm_dirty_bitmap) {
2428 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2429 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2430 if (kvm_dirty_bitmap == NULL) {
2431 perror("Failed to allocate dirty pages bitmap");
2432 r=-1;
2434 else {
2435 r = kvm_dirty_pages_log_enable_all(kvm_context);
2439 else {
2440 if (kvm_dirty_bitmap) {
2441 r = kvm_dirty_pages_log_reset(kvm_context);
2442 qemu_free(kvm_dirty_bitmap);
2443 kvm_dirty_bitmap = NULL;
2446 return r;
2449 /* get kvm's dirty pages bitmap and update qemu's */
2450 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2451 unsigned char *bitmap,
2452 unsigned long offset,
2453 unsigned long mem_size)
2455 unsigned int i, j, n=0;
2456 unsigned char c;
2457 unsigned long page_number, addr, addr1;
2458 ram_addr_t ram_addr;
2459 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2462 * bitmap-traveling is faster than memory-traveling (for addr...)
2463 * especially when most of the memory is not dirty.
2465 for (i=0; i<len; i++) {
2466 c = bitmap[i];
2467 while (c>0) {
2468 j = ffsl(c) - 1;
2469 c &= ~(1u<<j);
2470 page_number = i * 8 + j;
2471 addr1 = page_number * TARGET_PAGE_SIZE;
2472 addr = offset + addr1;
2473 ram_addr = cpu_get_physical_page_desc(addr);
2474 cpu_physical_memory_set_dirty(ram_addr);
2475 n++;
2478 return 0;
2480 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2481 void *bitmap, void *opaque)
2483 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2487 * get kvm's dirty pages bitmap and update qemu's
2488 * we only care about physical ram, which resides in slots 0 and 3
2490 int kvm_update_dirty_pages_log(void)
2492 int r = 0;
2495 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2496 NULL,
2497 kvm_get_dirty_bitmap_cb);
2498 return r;
2501 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2502 int log)
2504 if (log)
2505 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2506 else {
2507 #ifdef TARGET_I386
2508 if (must_use_aliases_target(start))
2509 return;
2510 #endif
2511 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2515 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2517 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2518 unsigned int brsize = BITMAP_SIZE(ram_size);
2519 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2520 unsigned int extra_bytes = (extra_pages +7)/8;
2521 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2522 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2524 memset(bitmap, 0xFF, brsize + extra_bytes);
2525 memset(bitmap + hole_start, 0, hole_end - hole_start);
2526 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2528 return 0;
2531 #ifdef KVM_CAP_IRQCHIP
2533 int kvm_set_irq(int irq, int level, int *status)
2535 return kvm_set_irq_level(kvm_context, irq, level, status);
2538 #endif
2540 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2542 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2545 void kvm_mutex_unlock(void)
2547 assert(!cpu_single_env);
2548 pthread_mutex_unlock(&qemu_mutex);
2551 void kvm_mutex_lock(void)
2553 pthread_mutex_lock(&qemu_mutex);
2554 cpu_single_env = NULL;
2557 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2558 void kvm_add_ioperm_data(struct ioperm_data *data)
2560 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2563 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2565 struct ioperm_data *data;
2567 data = LIST_FIRST(&ioperm_head);
2568 while (data) {
2569 struct ioperm_data *next = LIST_NEXT(data, entries);
2571 if (data->start_port == start_port && data->num == num) {
2572 LIST_REMOVE(data, entries);
2573 qemu_free(data);
2576 data = next;
2580 void kvm_ioperm(CPUState *env, void *data)
2582 if (kvm_enabled() && qemu_system_ready)
2583 on_vcpu(env, kvm_arch_do_ioperm, data);
2586 #endif
2588 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2590 #ifndef TARGET_IA64
2592 #ifdef TARGET_I386
2593 if (must_use_aliases_source(start_addr))
2594 return 0;
2595 #endif
2597 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2598 NULL, kvm_get_dirty_bitmap_cb);
2599 #endif
2600 return 0;
2603 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2605 #ifdef TARGET_I386
2606 if (must_use_aliases_source(phys_addr))
2607 return 0;
2608 #endif
2610 #ifndef TARGET_IA64
2611 kvm_qemu_log_memory(phys_addr, len, 1);
2612 #endif
2613 return 0;
2616 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2618 #ifdef TARGET_I386
2619 if (must_use_aliases_source(phys_addr))
2620 return 0;
2621 #endif
2623 #ifndef TARGET_IA64
2624 kvm_qemu_log_memory(phys_addr, len, 0);
2625 #endif
2626 return 0;
2629 void qemu_kvm_cpu_stop(CPUState *env)
2631 if (kvm_enabled())
2632 env->kvm_cpu_state.stopped = 1;
2635 int kvm_set_boot_cpu_id(uint32_t id)
2637 return kvm_set_boot_vcpu_id(kvm_context, id);
2640 #ifdef TARGET_I386
2641 #ifdef KVM_CAP_MCE
2642 struct kvm_x86_mce_data
2644 CPUState *env;
2645 struct kvm_x86_mce *mce;
2648 static void kvm_do_inject_x86_mce(void *_data)
2650 struct kvm_x86_mce_data *data = _data;
2651 int r;
2653 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2654 if (r < 0)
2655 perror("kvm_set_mce FAILED");
2657 #endif
2659 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2660 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2662 #ifdef KVM_CAP_MCE
2663 struct kvm_x86_mce mce = {
2664 .bank = bank,
2665 .status = status,
2666 .mcg_status = mcg_status,
2667 .addr = addr,
2668 .misc = misc,
2670 struct kvm_x86_mce_data data = {
2671 .env = cenv,
2672 .mce = &mce,
2675 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2676 #endif
2678 #endif