provide env->kvm_fd
[qemu-kvm/fedora.git] / qemu-kvm.c
blob6897e3c09a21c01fbfe5893aba5ad05a1aba92df
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 static KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->kvm_cpu_state.stopped = 1;
96 return handle;
98 #endif
100 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
102 *data = cpu_inb(0, addr);
103 return 0;
106 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
108 *data = cpu_inw(0, addr);
109 return 0;
112 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
114 *data = cpu_inl(0, addr);
115 return 0;
118 #define PM_IO_BASE 0xb000
120 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
122 if (addr == 0xb2) {
123 switch (data) {
124 case 0: {
125 cpu_outb(0, 0xb3, 0);
126 break;
128 case 0xf0: {
129 unsigned x;
131 /* enable acpi */
132 x = cpu_inw(0, PM_IO_BASE + 4);
133 x &= ~1;
134 cpu_outw(0, PM_IO_BASE + 4, x);
135 break;
137 case 0xf1: {
138 unsigned x;
140 /* enable acpi */
141 x = cpu_inw(0, PM_IO_BASE + 4);
142 x |= 1;
143 cpu_outw(0, PM_IO_BASE + 4, x);
144 break;
146 default:
147 break;
149 return 0;
151 cpu_outb(0, addr, data);
152 return 0;
155 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
157 cpu_outw(0, addr, data);
158 return 0;
161 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
163 cpu_outl(0, addr, data);
164 return 0;
167 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
169 cpu_physical_memory_rw(addr, data, len, 0);
170 return 0;
173 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
175 cpu_physical_memory_rw(addr, data, len, 1);
176 return 0;
179 static int handle_unhandled(kvm_context_t kvm, kvm_vcpu_context_t vcpu,
180 uint64_t reason)
182 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
183 return -EINVAL;
187 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
189 uint32_t *bitmap = kvm->used_gsi_bitmap;
191 if (gsi < kvm->max_gsi)
192 bitmap[gsi / 32] |= 1U << (gsi % 32);
193 else
194 DPRINTF("Invalid GSI %d\n");
197 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
199 uint32_t *bitmap = kvm->used_gsi_bitmap;
201 if (gsi < kvm->max_gsi)
202 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
203 else
204 DPRINTF("Invalid GSI %d\n");
207 struct slot_info {
208 unsigned long phys_addr;
209 unsigned long len;
210 unsigned long userspace_addr;
211 unsigned flags;
212 int logging_count;
215 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
217 static void init_slots(void)
219 int i;
221 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
222 slots[i].len = 0;
225 static int get_free_slot(kvm_context_t kvm)
227 int i;
228 int tss_ext;
230 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
231 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
232 #else
233 tss_ext = 0;
234 #endif
237 * on older kernels where the set tss ioctl is not supprted we must save
238 * slot 0 to hold the extended memory, as the vmx will use the last 3
239 * pages of this slot.
241 if (tss_ext > 0)
242 i = 0;
243 else
244 i = 1;
246 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
247 if (!slots[i].len)
248 return i;
249 return -1;
252 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
253 unsigned long userspace_addr, unsigned flags)
255 slots[slot].phys_addr = phys_addr;
256 slots[slot].len = len;
257 slots[slot].userspace_addr = userspace_addr;
258 slots[slot].flags = flags;
261 static void free_slot(int slot)
263 slots[slot].len = 0;
264 slots[slot].logging_count = 0;
267 static int get_slot(unsigned long phys_addr)
269 int i;
271 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
272 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
273 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
274 return i;
276 return -1;
279 /* Returns -1 if this slot is not totally contained on any other,
280 * and the number of the slot otherwise */
281 static int get_container_slot(uint64_t phys_addr, unsigned long size)
283 int i;
285 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
286 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
287 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
288 return i;
289 return -1;
292 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
294 int slot = get_container_slot(phys_addr, size);
295 if (slot == -1)
296 return 0;
297 return 1;
301 * dirty pages logging control
303 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
304 unsigned long phys_addr,
305 unsigned flags,
306 unsigned mask)
308 int r = -1;
309 int slot = get_slot(phys_addr);
311 if (slot == -1) {
312 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
313 return 1;
316 flags = (slots[slot].flags & ~mask) | flags;
317 if (flags == slots[slot].flags)
318 return 0;
319 slots[slot].flags = flags;
322 struct kvm_userspace_memory_region mem = {
323 .slot = slot,
324 .memory_size = slots[slot].len,
325 .guest_phys_addr = slots[slot].phys_addr,
326 .userspace_addr = slots[slot].userspace_addr,
327 .flags = slots[slot].flags,
331 DPRINTF("slot %d start %llx len %llx flags %x\n",
332 mem.slot,
333 mem.guest_phys_addr,
334 mem.memory_size,
335 mem.flags);
336 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
337 if (r == -1)
338 fprintf(stderr, "%s: %m\n", __FUNCTION__);
340 return r;
343 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
344 int (*change)(kvm_context_t kvm,
345 uint64_t start,
346 uint64_t len))
348 int i, r;
350 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
351 if (slots[i].len)
352 r = change(kvm, slots[i].phys_addr, slots[i].len);
354 return r;
357 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
358 uint64_t phys_addr,
359 uint64_t len)
361 int slot = get_slot(phys_addr);
363 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
364 if (slot == -1) {
365 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
366 return -EINVAL;
369 if (slots[slot].logging_count++)
370 return 0;
372 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
373 KVM_MEM_LOG_DIRTY_PAGES,
374 KVM_MEM_LOG_DIRTY_PAGES);
377 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
378 uint64_t phys_addr,
379 uint64_t len)
381 int slot = get_slot(phys_addr);
383 if (slot == -1) {
384 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
385 return -EINVAL;
388 if (--slots[slot].logging_count)
389 return 0;
391 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
393 KVM_MEM_LOG_DIRTY_PAGES);
397 * Enable dirty page logging for all memory regions
399 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
401 if (kvm->dirty_pages_log_all)
402 return 0;
403 kvm->dirty_pages_log_all = 1;
404 return kvm_dirty_pages_log_change_all(kvm,
405 kvm_dirty_pages_log_enable_slot);
409 * Enable dirty page logging only for memory regions that were created with
410 * dirty logging enabled (disable for all other memory regions).
412 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
414 if (!kvm->dirty_pages_log_all)
415 return 0;
416 kvm->dirty_pages_log_all = 0;
417 return kvm_dirty_pages_log_change_all(kvm,
418 kvm_dirty_pages_log_disable_slot);
422 int kvm_init(int smp_cpus)
424 int fd;
425 int r, gsi_count;
428 fd = open("/dev/kvm", O_RDWR);
429 if (fd == -1) {
430 perror("open /dev/kvm");
431 return -1;
433 r = ioctl(fd, KVM_GET_API_VERSION, 0);
434 if (r == -1) {
435 fprintf(stderr, "kvm kernel version too old: "
436 "KVM_GET_API_VERSION ioctl not supported\n");
437 goto out_close;
439 if (r < EXPECTED_KVM_API_VERSION) {
440 fprintf(stderr, "kvm kernel version too old: "
441 "We expect API version %d or newer, but got "
442 "version %d\n",
443 EXPECTED_KVM_API_VERSION, r);
444 goto out_close;
446 if (r > EXPECTED_KVM_API_VERSION) {
447 fprintf(stderr, "kvm userspace version too old\n");
448 goto out_close;
450 kvm_abi = r;
451 kvm_page_size = getpagesize();
452 kvm_state = qemu_mallocz(sizeof(*kvm_state));
453 kvm_context = &kvm_state->kvm_context;
455 kvm_context->fd = fd;
456 kvm_context->vm_fd = -1;
457 kvm_context->opaque = cpu_single_env;
458 kvm_context->dirty_pages_log_all = 0;
459 kvm_context->no_irqchip_creation = 0;
460 kvm_context->no_pit_creation = 0;
462 gsi_count = kvm_get_gsi_count(kvm_context);
463 if (gsi_count > 0) {
464 int gsi_bits, i;
466 /* Round up so we can search ints using ffs */
467 gsi_bits = ALIGN(gsi_count, 32);
468 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
469 kvm_context->max_gsi = gsi_bits;
471 /* Mark any over-allocated bits as already in use */
472 for (i = gsi_count; i < gsi_bits; i++)
473 set_gsi(kvm_context, i);
476 pthread_mutex_lock(&qemu_mutex);
477 return 0;
479 out_close:
480 close(fd);
481 return -1;
484 static void kvm_finalize(KVMState *s)
486 /* FIXME
487 if (kvm->vcpu_fd[0] != -1)
488 close(kvm->vcpu_fd[0]);
489 if (kvm->vm_fd != -1)
490 close(kvm->vm_fd);
492 close(s->kvm_context.fd);
493 free(s);
496 void kvm_disable_irqchip_creation(kvm_context_t kvm)
498 kvm->no_irqchip_creation = 1;
501 void kvm_disable_pit_creation(kvm_context_t kvm)
503 kvm->no_pit_creation = 1;
506 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
508 long mmap_size;
509 int r;
510 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
511 kvm_context_t kvm = kvm_context;
513 vcpu_ctx->kvm = kvm;
514 vcpu_ctx->id = id;
516 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id);
517 if (r == -1) {
518 fprintf(stderr, "kvm_create_vcpu: %m\n");
519 goto err;
521 vcpu_ctx->fd = r;
523 env->kvm_fd = r;
524 env->kvm_state = kvm_state;
526 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
527 if (mmap_size == -1) {
528 fprintf(stderr, "get vcpu mmap size: %m\n");
529 goto err_fd;
531 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
532 vcpu_ctx->fd, 0);
533 if (vcpu_ctx->run == MAP_FAILED) {
534 fprintf(stderr, "mmap vcpu area: %m\n");
535 goto err_fd;
537 return vcpu_ctx;
538 err_fd:
539 close(vcpu_ctx->fd);
540 err:
541 free(vcpu_ctx);
542 return NULL;
545 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
547 #ifdef KVM_CAP_SET_BOOT_CPU_ID
548 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
549 if (r > 0)
550 return ioctl(kvm->vm_fd, KVM_SET_BOOT_CPU_ID, id);
551 return -ENOSYS;
552 #else
553 return -ENOSYS;
554 #endif
557 int kvm_create_vm(kvm_context_t kvm)
559 int fd = kvm->fd;
561 #ifdef KVM_CAP_IRQ_ROUTING
562 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
563 kvm->nr_allocated_irq_routes = 0;
564 #endif
566 fd = ioctl(fd, KVM_CREATE_VM, 0);
567 if (fd == -1) {
568 fprintf(stderr, "kvm_create_vm: %m\n");
569 return -1;
571 kvm->vm_fd = fd;
572 return 0;
575 static int kvm_create_default_phys_mem(kvm_context_t kvm,
576 unsigned long phys_mem_bytes,
577 void **vm_mem)
579 #ifdef KVM_CAP_USER_MEMORY
580 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
581 if (r > 0)
582 return 0;
583 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
584 #else
585 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
586 #endif
587 return -1;
590 int kvm_check_extension(kvm_context_t kvm, int ext)
592 int ret;
594 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
595 if (ret > 0)
596 return ret;
597 return 0;
600 void kvm_create_irqchip(kvm_context_t kvm)
602 int r;
604 kvm->irqchip_in_kernel = 0;
605 #ifdef KVM_CAP_IRQCHIP
606 if (!kvm->no_irqchip_creation) {
607 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
608 if (r > 0) { /* kernel irqchip supported */
609 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
610 if (r >= 0) {
611 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
612 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
613 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
614 KVM_CAP_IRQ_INJECT_STATUS);
615 if (r > 0)
616 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
617 #endif
618 kvm->irqchip_in_kernel = 1;
620 else
621 fprintf(stderr, "Create kernel PIC irqchip failed\n");
624 #endif
627 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
629 int r;
631 r = kvm_create_vm(kvm);
632 if (r < 0)
633 return r;
634 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
635 if (r < 0)
636 return r;
637 init_slots();
638 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
639 if (r < 0)
640 return r;
641 kvm_create_irqchip(kvm);
643 return 0;
647 int kvm_register_phys_mem(kvm_context_t kvm,
648 unsigned long phys_start, void *userspace_addr,
649 unsigned long len, int log)
652 struct kvm_userspace_memory_region memory = {
653 .memory_size = len,
654 .guest_phys_addr = phys_start,
655 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
656 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
658 int r;
660 memory.slot = get_free_slot(kvm);
661 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
662 memory.guest_phys_addr, memory.memory_size,
663 memory.userspace_addr, memory.slot, memory.flags);
664 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
665 if (r == -1) {
666 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
667 return -1;
669 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
670 memory.userspace_addr, memory.flags);
671 return 0;
675 /* destroy/free a whole slot.
676 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
678 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
679 unsigned long len)
681 int slot;
682 int r;
683 struct kvm_userspace_memory_region memory = {
684 .memory_size = 0,
685 .guest_phys_addr = phys_start,
686 .userspace_addr = 0,
687 .flags = 0,
690 slot = get_slot(phys_start);
692 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
693 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
694 __FUNCTION__, slot);
695 return;
697 if (phys_start != slots[slot].phys_addr) {
698 fprintf(stderr,
699 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
700 __FUNCTION__, phys_start, slots[slot].phys_addr);
701 phys_start = slots[slot].phys_addr;
704 memory.slot = slot;
705 DPRINTF("slot %d start %llx len %llx flags %x\n",
706 memory.slot,
707 memory.guest_phys_addr,
708 memory.memory_size,
709 memory.flags);
710 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
711 if (r == -1) {
712 fprintf(stderr, "destroy_userspace_phys_mem: %s",
713 strerror(errno));
714 return;
717 free_slot(memory.slot);
720 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
723 int slot = get_container_slot(phys_addr, size);
725 if (slot != -1) {
726 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
727 kvm_destroy_phys_mem(kvm, phys_addr, size);
728 return;
732 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
734 int r;
735 struct kvm_dirty_log log = {
736 .slot = slot,
739 log.dirty_bitmap = buf;
741 r = ioctl(kvm->vm_fd, ioctl_num, &log);
742 if (r == -1)
743 return -errno;
744 return 0;
747 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
749 int slot;
751 slot = get_slot(phys_addr);
752 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
755 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
756 unsigned long len, void *opaque,
757 int (*cb)(unsigned long start, unsigned long len,
758 void*bitmap, void *opaque))
760 int i;
761 int r;
762 unsigned long end_addr = phys_addr + len;
763 void *buf;
765 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
766 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
767 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
768 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
769 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
770 if (r) {
771 qemu_free(buf);
772 return r;
774 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
775 qemu_free(buf);
776 if (r)
777 return r;
780 return 0;
783 #ifdef KVM_CAP_IRQCHIP
785 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
787 struct kvm_irq_level event;
788 int r;
790 if (!kvm->irqchip_in_kernel)
791 return 0;
792 event.level = level;
793 event.irq = irq;
794 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
795 if (r == -1)
796 perror("kvm_set_irq_level");
798 if (status) {
799 #ifdef KVM_CAP_IRQ_INJECT_STATUS
800 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
801 1 : event.status;
802 #else
803 *status = 1;
804 #endif
807 return 1;
810 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
812 int r;
814 if (!kvm->irqchip_in_kernel)
815 return 0;
816 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
817 if (r == -1) {
818 r = -errno;
819 perror("kvm_get_irqchip\n");
821 return r;
824 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
826 int r;
828 if (!kvm->irqchip_in_kernel)
829 return 0;
830 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
831 if (r == -1) {
832 r = -errno;
833 perror("kvm_set_irqchip\n");
835 return r;
838 #endif
840 static int handle_io(kvm_vcpu_context_t vcpu)
842 struct kvm_run *run = vcpu->run;
843 kvm_context_t kvm = vcpu->kvm;
844 uint16_t addr = run->io.port;
845 int r;
846 int i;
847 void *p = (void *)run + run->io.data_offset;
849 for (i = 0; i < run->io.count; ++i) {
850 switch (run->io.direction) {
851 case KVM_EXIT_IO_IN:
852 switch (run->io.size) {
853 case 1:
854 r = kvm_inb(kvm->opaque, addr, p);
855 break;
856 case 2:
857 r = kvm_inw(kvm->opaque, addr, p);
858 break;
859 case 4:
860 r = kvm_inl(kvm->opaque, addr, p);
861 break;
862 default:
863 fprintf(stderr, "bad I/O size %d\n", run->io.size);
864 return -EMSGSIZE;
866 break;
867 case KVM_EXIT_IO_OUT:
868 switch (run->io.size) {
869 case 1:
870 r = kvm_outb(kvm->opaque, addr,
871 *(uint8_t *)p);
872 break;
873 case 2:
874 r = kvm_outw(kvm->opaque, addr,
875 *(uint16_t *)p);
876 break;
877 case 4:
878 r = kvm_outl(kvm->opaque, addr,
879 *(uint32_t *)p);
880 break;
881 default:
882 fprintf(stderr, "bad I/O size %d\n", run->io.size);
883 return -EMSGSIZE;
885 break;
886 default:
887 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
888 return -EPROTO;
891 p += run->io.size;
894 return 0;
897 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
899 #ifdef KVM_CAP_SET_GUEST_DEBUG
900 struct kvm_run *run = vcpu->run;
901 kvm_context_t kvm = vcpu->kvm;
903 return kvm_debug(kvm->opaque, env, &run->debug.arch);
904 #else
905 return 0;
906 #endif
909 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
911 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
914 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
916 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
919 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
921 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
924 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
926 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
929 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
931 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
934 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
936 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
939 #ifdef KVM_CAP_MP_STATE
940 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
942 int r;
944 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
945 if (r > 0)
946 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
947 return -ENOSYS;
950 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
952 int r;
954 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
955 if (r > 0)
956 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
957 return -ENOSYS;
959 #endif
961 static int handle_mmio(kvm_vcpu_context_t vcpu)
963 unsigned long addr = vcpu->run->mmio.phys_addr;
964 kvm_context_t kvm = vcpu->kvm;
965 struct kvm_run *kvm_run = vcpu->run;
966 void *data = kvm_run->mmio.data;
968 /* hack: Red Hat 7.1 generates these weird accesses. */
969 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
970 return 0;
972 if (kvm_run->mmio.is_write)
973 return kvm_mmio_write(kvm->opaque, addr, data,
974 kvm_run->mmio.len);
975 else
976 return kvm_mmio_read(kvm->opaque, addr, data,
977 kvm_run->mmio.len);
980 int handle_io_window(kvm_context_t kvm)
982 return 1;
985 int handle_halt(kvm_vcpu_context_t vcpu)
987 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
990 int handle_shutdown(kvm_context_t kvm, CPUState *env)
992 /* stop the current vcpu from going back to guest mode */
993 env->kvm_cpu_state.stopped = 1;
995 qemu_system_reset_request();
996 return 1;
999 static inline void push_nmi(kvm_context_t kvm)
1001 #ifdef KVM_CAP_USER_NMI
1002 kvm_arch_push_nmi(kvm->opaque);
1003 #endif /* KVM_CAP_USER_NMI */
1006 void post_kvm_run(kvm_context_t kvm, CPUState *env)
1008 pthread_mutex_lock(&qemu_mutex);
1009 kvm_arch_post_kvm_run(kvm->opaque, env);
1012 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
1014 kvm_arch_pre_kvm_run(kvm->opaque, env);
1016 if (env->exit_request)
1017 return 1;
1018 pthread_mutex_unlock(&qemu_mutex);
1019 return 0;
1022 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
1024 return vcpu->run->if_flag;
1027 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1029 return vcpu->run->ready_for_interrupt_injection;
1032 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1034 int r;
1035 int fd = vcpu->fd;
1036 struct kvm_run *run = vcpu->run;
1037 kvm_context_t kvm = vcpu->kvm;
1039 again:
1040 push_nmi(kvm);
1041 #if !defined(__s390__)
1042 if (!kvm->irqchip_in_kernel)
1043 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1044 #endif
1045 r = pre_kvm_run(kvm, env);
1046 if (r)
1047 return r;
1048 r = ioctl(fd, KVM_RUN, 0);
1050 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1051 r = -errno;
1052 post_kvm_run(kvm, env);
1053 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1054 return r;
1057 post_kvm_run(kvm, env);
1059 #if defined(KVM_CAP_COALESCED_MMIO)
1060 if (kvm->coalesced_mmio) {
1061 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1062 kvm->coalesced_mmio * PAGE_SIZE;
1063 while (ring->first != ring->last) {
1064 kvm_mmio_write(kvm->opaque,
1065 ring->coalesced_mmio[ring->first].phys_addr,
1066 &ring->coalesced_mmio[ring->first].data[0],
1067 ring->coalesced_mmio[ring->first].len);
1068 smp_wmb();
1069 ring->first = (ring->first + 1) %
1070 KVM_COALESCED_MMIO_MAX;
1073 #endif
1075 #if !defined(__s390__)
1076 if (r == -1) {
1077 r = handle_io_window(kvm);
1078 goto more;
1080 #endif
1081 if (1) {
1082 switch (run->exit_reason) {
1083 case KVM_EXIT_UNKNOWN:
1084 r = handle_unhandled(kvm, vcpu,
1085 run->hw.hardware_exit_reason);
1086 break;
1087 case KVM_EXIT_FAIL_ENTRY:
1088 r = handle_unhandled(kvm, vcpu,
1089 run->fail_entry.hardware_entry_failure_reason);
1090 break;
1091 case KVM_EXIT_EXCEPTION:
1092 fprintf(stderr, "exception %d (%x)\n",
1093 run->ex.exception,
1094 run->ex.error_code);
1095 kvm_show_regs(vcpu);
1096 kvm_show_code(vcpu);
1097 abort();
1098 break;
1099 case KVM_EXIT_IO:
1100 r = handle_io(vcpu);
1101 break;
1102 case KVM_EXIT_DEBUG:
1103 r = handle_debug(vcpu, env);
1104 break;
1105 case KVM_EXIT_MMIO:
1106 r = handle_mmio(vcpu);
1107 break;
1108 case KVM_EXIT_HLT:
1109 r = handle_halt(vcpu);
1110 break;
1111 case KVM_EXIT_IRQ_WINDOW_OPEN:
1112 break;
1113 case KVM_EXIT_SHUTDOWN:
1114 r = handle_shutdown(kvm, env);
1115 break;
1116 #if defined(__s390__)
1117 case KVM_EXIT_S390_SIEIC:
1118 r = kvm_s390_handle_intercept(kvm, vcpu,
1119 run);
1120 break;
1121 case KVM_EXIT_S390_RESET:
1122 r = kvm_s390_handle_reset(kvm, vcpu, run);
1123 break;
1124 #endif
1125 default:
1126 if (kvm_arch_run(vcpu)) {
1127 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1128 run->exit_reason);
1129 kvm_show_regs(vcpu);
1130 abort();
1132 break;
1135 more:
1136 if (!r)
1137 goto again;
1138 return r;
1141 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1143 struct kvm_interrupt intr;
1145 intr.irq = irq;
1146 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1149 #ifdef KVM_CAP_SET_GUEST_DEBUG
1150 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1152 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1154 #endif
1156 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1158 struct kvm_signal_mask *sigmask;
1159 int r;
1161 if (!sigset) {
1162 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1163 if (r == -1)
1164 r = -errno;
1165 return r;
1167 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1169 sigmask->len = 8;
1170 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1171 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1172 if (r == -1)
1173 r = -errno;
1174 free(sigmask);
1175 return r;
1178 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1180 return kvm->irqchip_in_kernel;
1183 int kvm_pit_in_kernel(kvm_context_t kvm)
1185 return kvm->pit_in_kernel;
1188 int kvm_has_sync_mmu(void)
1190 int r = 0;
1191 #ifdef KVM_CAP_SYNC_MMU
1192 r = ioctl(kvm_context->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1193 #endif
1194 return r;
1197 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1199 #ifdef KVM_CAP_USER_NMI
1200 return ioctl(vcpu->fd, KVM_NMI);
1201 #else
1202 return -ENOSYS;
1203 #endif
1206 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1208 int r = 0;
1209 kvm->coalesced_mmio = 0;
1210 #ifdef KVM_CAP_COALESCED_MMIO
1211 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1212 if (r > 0) {
1213 kvm->coalesced_mmio = r;
1214 return 0;
1216 #endif
1217 return r;
1220 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1222 #ifdef KVM_CAP_COALESCED_MMIO
1223 kvm_context_t kvm = kvm_context;
1224 struct kvm_coalesced_mmio_zone zone;
1225 int r;
1227 if (kvm->coalesced_mmio) {
1229 zone.addr = addr;
1230 zone.size = size;
1232 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1233 if (r == -1) {
1234 perror("kvm_register_coalesced_mmio_zone");
1235 return -errno;
1237 return 0;
1239 #endif
1240 return -ENOSYS;
1243 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1245 #ifdef KVM_CAP_COALESCED_MMIO
1246 kvm_context_t kvm = kvm_context;
1247 struct kvm_coalesced_mmio_zone zone;
1248 int r;
1250 if (kvm->coalesced_mmio) {
1252 zone.addr = addr;
1253 zone.size = size;
1255 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1256 if (r == -1) {
1257 perror("kvm_unregister_coalesced_mmio_zone");
1258 return -errno;
1260 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1261 return 0;
1263 #endif
1264 return -ENOSYS;
1267 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1268 int kvm_assign_pci_device(kvm_context_t kvm,
1269 struct kvm_assigned_pci_dev *assigned_dev)
1271 int ret;
1273 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1274 if (ret < 0)
1275 return -errno;
1277 return ret;
1280 static int kvm_old_assign_irq(kvm_context_t kvm,
1281 struct kvm_assigned_irq *assigned_irq)
1283 int ret;
1285 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1286 if (ret < 0)
1287 return -errno;
1289 return ret;
1292 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1293 int kvm_assign_irq(kvm_context_t kvm,
1294 struct kvm_assigned_irq *assigned_irq)
1296 int ret;
1298 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1299 if (ret > 0) {
1300 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1301 if (ret < 0)
1302 return -errno;
1303 return ret;
1306 return kvm_old_assign_irq(kvm, assigned_irq);
1309 int kvm_deassign_irq(kvm_context_t kvm,
1310 struct kvm_assigned_irq *assigned_irq)
1312 int ret;
1314 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1315 if (ret < 0)
1316 return -errno;
1318 return ret;
1320 #else
1321 int kvm_assign_irq(kvm_context_t kvm,
1322 struct kvm_assigned_irq *assigned_irq)
1324 return kvm_old_assign_irq(kvm, assigned_irq);
1326 #endif
1327 #endif
1329 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1330 int kvm_deassign_pci_device(kvm_context_t kvm,
1331 struct kvm_assigned_pci_dev *assigned_dev)
1333 int ret;
1335 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1336 if (ret < 0)
1337 return -errno;
1339 return ret;
1341 #endif
1343 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1345 int ret = 0;
1347 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1348 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1349 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1350 if (ret <= 0)
1351 ret = 0;
1352 #endif
1353 return ret;
1356 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1358 #ifdef KVM_CAP_REINJECT_CONTROL
1359 int r;
1360 struct kvm_reinject_control control;
1362 control.pit_reinject = pit_reinject;
1364 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1365 if (r > 0) {
1366 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1367 if (r == -1)
1368 return -errno;
1369 return r;
1371 #endif
1372 return -ENOSYS;
1375 int kvm_has_gsi_routing(kvm_context_t kvm)
1377 int r = 0;
1379 #ifdef KVM_CAP_IRQ_ROUTING
1380 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1381 #endif
1382 return r;
1385 int kvm_get_gsi_count(kvm_context_t kvm)
1387 #ifdef KVM_CAP_IRQ_ROUTING
1388 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1389 #else
1390 return -EINVAL;
1391 #endif
1394 int kvm_clear_gsi_routes(kvm_context_t kvm)
1396 #ifdef KVM_CAP_IRQ_ROUTING
1397 kvm->irq_routes->nr = 0;
1398 return 0;
1399 #else
1400 return -EINVAL;
1401 #endif
1404 int kvm_add_routing_entry(kvm_context_t kvm,
1405 struct kvm_irq_routing_entry* entry)
1407 #ifdef KVM_CAP_IRQ_ROUTING
1408 struct kvm_irq_routing *z;
1409 struct kvm_irq_routing_entry *new;
1410 int n, size;
1412 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1413 n = kvm->nr_allocated_irq_routes * 2;
1414 if (n < 64)
1415 n = 64;
1416 size = sizeof(struct kvm_irq_routing);
1417 size += n * sizeof(*new);
1418 z = realloc(kvm->irq_routes, size);
1419 if (!z)
1420 return -ENOMEM;
1421 kvm->nr_allocated_irq_routes = n;
1422 kvm->irq_routes = z;
1424 n = kvm->irq_routes->nr++;
1425 new = &kvm->irq_routes->entries[n];
1426 memset(new, 0, sizeof(*new));
1427 new->gsi = entry->gsi;
1428 new->type = entry->type;
1429 new->flags = entry->flags;
1430 new->u = entry->u;
1432 set_gsi(kvm, entry->gsi);
1434 return 0;
1435 #else
1436 return -ENOSYS;
1437 #endif
1440 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1442 #ifdef KVM_CAP_IRQ_ROUTING
1443 struct kvm_irq_routing_entry e;
1445 e.gsi = gsi;
1446 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1447 e.flags = 0;
1448 e.u.irqchip.irqchip = irqchip;
1449 e.u.irqchip.pin = pin;
1450 return kvm_add_routing_entry(kvm, &e);
1451 #else
1452 return -ENOSYS;
1453 #endif
1456 int kvm_del_routing_entry(kvm_context_t kvm,
1457 struct kvm_irq_routing_entry* entry)
1459 #ifdef KVM_CAP_IRQ_ROUTING
1460 struct kvm_irq_routing_entry *e, *p;
1461 int i, gsi, found = 0;
1463 gsi = entry->gsi;
1465 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1466 e = &kvm->irq_routes->entries[i];
1467 if (e->type == entry->type
1468 && e->gsi == gsi) {
1469 switch (e->type)
1471 case KVM_IRQ_ROUTING_IRQCHIP: {
1472 if (e->u.irqchip.irqchip ==
1473 entry->u.irqchip.irqchip
1474 && e->u.irqchip.pin ==
1475 entry->u.irqchip.pin) {
1476 p = &kvm->irq_routes->
1477 entries[--kvm->irq_routes->nr];
1478 *e = *p;
1479 found = 1;
1481 break;
1483 case KVM_IRQ_ROUTING_MSI: {
1484 if (e->u.msi.address_lo ==
1485 entry->u.msi.address_lo
1486 && e->u.msi.address_hi ==
1487 entry->u.msi.address_hi
1488 && e->u.msi.data == entry->u.msi.data) {
1489 p = &kvm->irq_routes->
1490 entries[--kvm->irq_routes->nr];
1491 *e = *p;
1492 found = 1;
1494 break;
1496 default:
1497 break;
1499 if (found) {
1500 /* If there are no other users of this GSI
1501 * mark it available in the bitmap */
1502 for (i = 0; i < kvm->irq_routes->nr; i++) {
1503 e = &kvm->irq_routes->entries[i];
1504 if (e->gsi == gsi)
1505 break;
1507 if (i == kvm->irq_routes->nr)
1508 clear_gsi(kvm, gsi);
1510 return 0;
1514 return -ESRCH;
1515 #else
1516 return -ENOSYS;
1517 #endif
1520 int kvm_update_routing_entry(kvm_context_t kvm,
1521 struct kvm_irq_routing_entry* entry,
1522 struct kvm_irq_routing_entry* newentry)
1524 #ifdef KVM_CAP_IRQ_ROUTING
1525 struct kvm_irq_routing_entry *e;
1526 int i;
1528 if (entry->gsi != newentry->gsi ||
1529 entry->type != newentry->type) {
1530 return -EINVAL;
1533 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1534 e = &kvm->irq_routes->entries[i];
1535 if (e->type != entry->type || e->gsi != entry->gsi) {
1536 continue;
1538 switch (e->type) {
1539 case KVM_IRQ_ROUTING_IRQCHIP:
1540 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1541 e->u.irqchip.pin == entry->u.irqchip.pin) {
1542 memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
1543 return 0;
1545 break;
1546 case KVM_IRQ_ROUTING_MSI:
1547 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1548 e->u.msi.address_hi == entry->u.msi.address_hi &&
1549 e->u.msi.data == entry->u.msi.data) {
1550 memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
1551 return 0;
1553 break;
1554 default:
1555 break;
1558 return -ESRCH;
1559 #else
1560 return -ENOSYS;
1561 #endif
1564 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1566 #ifdef KVM_CAP_IRQ_ROUTING
1567 struct kvm_irq_routing_entry e;
1569 e.gsi = gsi;
1570 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1571 e.flags = 0;
1572 e.u.irqchip.irqchip = irqchip;
1573 e.u.irqchip.pin = pin;
1574 return kvm_del_routing_entry(kvm, &e);
1575 #else
1576 return -ENOSYS;
1577 #endif
1580 int kvm_commit_irq_routes(kvm_context_t kvm)
1582 #ifdef KVM_CAP_IRQ_ROUTING
1583 int r;
1585 kvm->irq_routes->flags = 0;
1586 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1587 if (r == -1)
1588 r = -errno;
1589 return r;
1590 #else
1591 return -ENOSYS;
1592 #endif
1595 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1597 int i, bit;
1598 uint32_t *buf = kvm->used_gsi_bitmap;
1600 /* Return the lowest unused GSI in the bitmap */
1601 for (i = 0; i < kvm->max_gsi / 32; i++) {
1602 bit = ffs(~buf[i]);
1603 if (!bit)
1604 continue;
1606 return bit - 1 + i * 32;
1609 return -ENOSPC;
1612 #ifdef KVM_CAP_DEVICE_MSIX
1613 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1614 struct kvm_assigned_msix_nr *msix_nr)
1616 int ret;
1618 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1619 if (ret < 0)
1620 return -errno;
1622 return ret;
1625 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1626 struct kvm_assigned_msix_entry *entry)
1628 int ret;
1630 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1631 if (ret < 0)
1632 return -errno;
1634 return ret;
1636 #endif
1638 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1640 #include <sys/eventfd.h>
1642 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1644 int r;
1645 struct kvm_irqfd data = {
1646 .fd = fd,
1647 .gsi = gsi,
1648 .flags = flags,
1651 r = ioctl(kvm->vm_fd, KVM_IRQFD, &data);
1652 if (r == -1)
1653 r = -errno;
1654 return r;
1657 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1659 int r;
1660 int fd;
1662 if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
1663 return -ENOENT;
1665 fd = eventfd(0, 0);
1666 if (fd < 0)
1667 return -errno;
1669 r = _kvm_irqfd(kvm, fd, gsi, 0);
1670 if (r < 0) {
1671 close(fd);
1672 return -errno;
1675 return fd;
1678 #else /* KVM_CAP_IRQFD */
1680 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1682 return -ENOSYS;
1685 #endif /* KVM_CAP_IRQFD */
1686 static inline unsigned long kvm_get_thread_id(void)
1688 return syscall(SYS_gettid);
1691 static void qemu_cond_wait(pthread_cond_t *cond)
1693 CPUState *env = cpu_single_env;
1694 static const struct timespec ts = {
1695 .tv_sec = 0,
1696 .tv_nsec = 100000,
1699 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1700 cpu_single_env = env;
1703 static void sig_ipi_handler(int n)
1707 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1709 struct qemu_work_item wi;
1711 if (env == current_env) {
1712 func(data);
1713 return;
1716 wi.func = func;
1717 wi.data = data;
1718 if (!env->kvm_cpu_state.queued_work_first)
1719 env->kvm_cpu_state.queued_work_first = &wi;
1720 else
1721 env->kvm_cpu_state.queued_work_last->next = &wi;
1722 env->kvm_cpu_state.queued_work_last = &wi;
1723 wi.next = NULL;
1724 wi.done = false;
1726 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1727 while (!wi.done)
1728 qemu_cond_wait(&qemu_work_cond);
1731 static void inject_interrupt(void *data)
1733 cpu_interrupt(current_env, (long)data);
1736 void kvm_inject_interrupt(CPUState *env, int mask)
1738 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1741 void kvm_update_interrupt_request(CPUState *env)
1743 int signal = 0;
1745 if (env) {
1746 if (!current_env || !current_env->kvm_cpu_state.created)
1747 signal = 1;
1749 * Testing for created here is really redundant
1751 if (current_env && current_env->kvm_cpu_state.created &&
1752 env != current_env && !env->kvm_cpu_state.signalled)
1753 signal = 1;
1755 if (signal) {
1756 env->kvm_cpu_state.signalled = 1;
1757 if (env->kvm_cpu_state.thread)
1758 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1763 static void kvm_do_load_registers(void *_env)
1765 CPUState *env = _env;
1767 kvm_arch_load_regs(env);
1770 void kvm_load_registers(CPUState *env)
1772 if (kvm_enabled() && qemu_system_ready)
1773 on_vcpu(env, kvm_do_load_registers, env);
1776 static void kvm_do_save_registers(void *_env)
1778 CPUState *env = _env;
1780 kvm_arch_save_regs(env);
1783 void kvm_save_registers(CPUState *env)
1785 if (kvm_enabled())
1786 on_vcpu(env, kvm_do_save_registers, env);
1789 static void kvm_do_load_mpstate(void *_env)
1791 CPUState *env = _env;
1793 kvm_arch_load_mpstate(env);
1796 void kvm_load_mpstate(CPUState *env)
1798 if (kvm_enabled() && qemu_system_ready)
1799 on_vcpu(env, kvm_do_load_mpstate, env);
1802 static void kvm_do_save_mpstate(void *_env)
1804 CPUState *env = _env;
1806 kvm_arch_save_mpstate(env);
1807 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1810 void kvm_save_mpstate(CPUState *env)
1812 if (kvm_enabled())
1813 on_vcpu(env, kvm_do_save_mpstate, env);
1816 int kvm_cpu_exec(CPUState *env)
1818 int r;
1820 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1821 if (r < 0) {
1822 printf("kvm_run returned %d\n", r);
1823 vm_stop(0);
1826 return 0;
1829 static int is_cpu_stopped(CPUState *env)
1831 return !vm_running || env->kvm_cpu_state.stopped;
1834 static void flush_queued_work(CPUState *env)
1836 struct qemu_work_item *wi;
1838 if (!env->kvm_cpu_state.queued_work_first)
1839 return;
1841 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1842 env->kvm_cpu_state.queued_work_first = wi->next;
1843 wi->func(wi->data);
1844 wi->done = true;
1846 env->kvm_cpu_state.queued_work_last = NULL;
1847 pthread_cond_broadcast(&qemu_work_cond);
1850 static void kvm_main_loop_wait(CPUState *env, int timeout)
1852 struct timespec ts;
1853 int r, e;
1854 siginfo_t siginfo;
1855 sigset_t waitset;
1857 pthread_mutex_unlock(&qemu_mutex);
1859 ts.tv_sec = timeout / 1000;
1860 ts.tv_nsec = (timeout % 1000) * 1000000;
1861 sigemptyset(&waitset);
1862 sigaddset(&waitset, SIG_IPI);
1864 r = sigtimedwait(&waitset, &siginfo, &ts);
1865 e = errno;
1867 pthread_mutex_lock(&qemu_mutex);
1869 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1870 printf("sigtimedwait: %s\n", strerror(e));
1871 exit(1);
1874 cpu_single_env = env;
1875 flush_queued_work(env);
1877 if (env->kvm_cpu_state.stop) {
1878 env->kvm_cpu_state.stop = 0;
1879 env->kvm_cpu_state.stopped = 1;
1880 pthread_cond_signal(&qemu_pause_cond);
1883 env->kvm_cpu_state.signalled = 0;
1886 static int all_threads_paused(void)
1888 CPUState *penv = first_cpu;
1890 while (penv) {
1891 if (penv->kvm_cpu_state.stop)
1892 return 0;
1893 penv = (CPUState *)penv->next_cpu;
1896 return 1;
1899 static void pause_all_threads(void)
1901 CPUState *penv = first_cpu;
1903 while (penv) {
1904 if (penv != cpu_single_env) {
1905 penv->kvm_cpu_state.stop = 1;
1906 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1907 } else {
1908 penv->kvm_cpu_state.stop = 0;
1909 penv->kvm_cpu_state.stopped = 1;
1910 cpu_exit(penv);
1912 penv = (CPUState *)penv->next_cpu;
1915 while (!all_threads_paused())
1916 qemu_cond_wait(&qemu_pause_cond);
1919 static void resume_all_threads(void)
1921 CPUState *penv = first_cpu;
1923 assert(!cpu_single_env);
1925 while (penv) {
1926 penv->kvm_cpu_state.stop = 0;
1927 penv->kvm_cpu_state.stopped = 0;
1928 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1929 penv = (CPUState *)penv->next_cpu;
1933 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1935 if (running)
1936 resume_all_threads();
1937 else
1938 pause_all_threads();
1941 static void setup_kernel_sigmask(CPUState *env)
1943 sigset_t set;
1945 sigemptyset(&set);
1946 sigaddset(&set, SIGUSR2);
1947 sigaddset(&set, SIGIO);
1948 sigaddset(&set, SIGALRM);
1949 sigprocmask(SIG_BLOCK, &set, NULL);
1951 sigprocmask(SIG_BLOCK, NULL, &set);
1952 sigdelset(&set, SIG_IPI);
1954 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1957 static void qemu_kvm_system_reset(void)
1959 CPUState *penv = first_cpu;
1961 pause_all_threads();
1963 qemu_system_reset();
1965 while (penv) {
1966 kvm_arch_cpu_reset(penv);
1967 penv = (CPUState *)penv->next_cpu;
1970 resume_all_threads();
1973 static void process_irqchip_events(CPUState *env)
1975 kvm_arch_process_irqchip_events(env);
1976 if (kvm_arch_has_work(env))
1977 env->halted = 0;
1980 static int kvm_main_loop_cpu(CPUState *env)
1982 setup_kernel_sigmask(env);
1984 pthread_mutex_lock(&qemu_mutex);
1986 kvm_qemu_init_env(env);
1987 #ifdef TARGET_I386
1988 kvm_tpr_vcpu_start(env);
1989 #endif
1991 cpu_single_env = env;
1992 kvm_arch_load_regs(env);
1994 while (1) {
1995 int run_cpu = !is_cpu_stopped(env);
1996 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1997 process_irqchip_events(env);
1998 run_cpu = !env->halted;
2000 if (run_cpu) {
2001 kvm_main_loop_wait(env, 0);
2002 kvm_cpu_exec(env);
2003 } else {
2004 kvm_main_loop_wait(env, 1000);
2007 pthread_mutex_unlock(&qemu_mutex);
2008 return 0;
2011 static void *ap_main_loop(void *_env)
2013 CPUState *env = _env;
2014 sigset_t signals;
2015 struct ioperm_data *data = NULL;
2017 current_env = env;
2018 env->thread_id = kvm_get_thread_id();
2019 sigfillset(&signals);
2020 sigprocmask(SIG_BLOCK, &signals, NULL);
2021 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
2023 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2024 /* do ioperm for io ports of assigned devices */
2025 LIST_FOREACH(data, &ioperm_head, entries)
2026 on_vcpu(env, kvm_arch_do_ioperm, data);
2027 #endif
2029 /* signal VCPU creation */
2030 pthread_mutex_lock(&qemu_mutex);
2031 current_env->kvm_cpu_state.created = 1;
2032 pthread_cond_signal(&qemu_vcpu_cond);
2034 /* and wait for machine initialization */
2035 while (!qemu_system_ready)
2036 qemu_cond_wait(&qemu_system_cond);
2037 pthread_mutex_unlock(&qemu_mutex);
2039 kvm_main_loop_cpu(env);
2040 return NULL;
2043 void kvm_init_vcpu(CPUState *env)
2045 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
2047 while (env->kvm_cpu_state.created == 0)
2048 qemu_cond_wait(&qemu_vcpu_cond);
2051 int kvm_vcpu_inited(CPUState *env)
2053 return env->kvm_cpu_state.created;
2056 #ifdef TARGET_I386
2057 void kvm_hpet_disable_kpit(void)
2059 struct kvm_pit_state2 ps2;
2061 kvm_get_pit2(kvm_context, &ps2);
2062 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
2063 kvm_set_pit2(kvm_context, &ps2);
2066 void kvm_hpet_enable_kpit(void)
2068 struct kvm_pit_state2 ps2;
2070 kvm_get_pit2(kvm_context, &ps2);
2071 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
2072 kvm_set_pit2(kvm_context, &ps2);
2074 #endif
2076 int kvm_init_ap(void)
2078 #ifdef TARGET_I386
2079 kvm_tpr_opt_setup();
2080 #endif
2081 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2083 signal(SIG_IPI, sig_ipi_handler);
2084 return 0;
2087 void qemu_kvm_notify_work(void)
2089 uint64_t value = 1;
2090 char buffer[8];
2091 size_t offset = 0;
2093 if (io_thread_fd == -1)
2094 return;
2096 memcpy(buffer, &value, sizeof(value));
2098 while (offset < 8) {
2099 ssize_t len;
2101 len = write(io_thread_fd, buffer + offset, 8 - offset);
2102 if (len == -1 && errno == EINTR)
2103 continue;
2105 if (len <= 0)
2106 break;
2108 offset += len;
2111 if (offset != 8)
2112 fprintf(stderr, "failed to notify io thread\n");
2115 /* If we have signalfd, we mask out the signals we want to handle and then
2116 * use signalfd to listen for them. We rely on whatever the current signal
2117 * handler is to dispatch the signals when we receive them.
2120 static void sigfd_handler(void *opaque)
2122 int fd = (unsigned long)opaque;
2123 struct qemu_signalfd_siginfo info;
2124 struct sigaction action;
2125 ssize_t len;
2127 while (1) {
2128 do {
2129 len = read(fd, &info, sizeof(info));
2130 } while (len == -1 && errno == EINTR);
2132 if (len == -1 && errno == EAGAIN)
2133 break;
2135 if (len != sizeof(info)) {
2136 printf("read from sigfd returned %zd: %m\n", len);
2137 return;
2140 sigaction(info.ssi_signo, NULL, &action);
2141 if (action.sa_handler)
2142 action.sa_handler(info.ssi_signo);
2147 /* Used to break IO thread out of select */
2148 static void io_thread_wakeup(void *opaque)
2150 int fd = (unsigned long)opaque;
2151 char buffer[8];
2152 size_t offset = 0;
2154 while (offset < 8) {
2155 ssize_t len;
2157 len = read(fd, buffer + offset, 8 - offset);
2158 if (len == -1 && errno == EINTR)
2159 continue;
2161 if (len <= 0)
2162 break;
2164 offset += len;
2168 int kvm_main_loop(void)
2170 int fds[2];
2171 sigset_t mask;
2172 int sigfd;
2174 io_thread = pthread_self();
2175 qemu_system_ready = 1;
2177 if (qemu_eventfd(fds) == -1) {
2178 fprintf(stderr, "failed to create eventfd\n");
2179 return -errno;
2182 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2183 (void *)(unsigned long)fds[0]);
2185 io_thread_fd = fds[1];
2187 sigemptyset(&mask);
2188 sigaddset(&mask, SIGIO);
2189 sigaddset(&mask, SIGALRM);
2190 sigprocmask(SIG_BLOCK, &mask, NULL);
2192 sigfd = qemu_signalfd(&mask);
2193 if (sigfd == -1) {
2194 fprintf(stderr, "failed to create signalfd\n");
2195 return -errno;
2198 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2200 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2201 (void *)(unsigned long)sigfd);
2203 pthread_cond_broadcast(&qemu_system_cond);
2205 io_thread_sigfd = sigfd;
2206 cpu_single_env = NULL;
2208 while (1) {
2209 main_loop_wait(1000);
2210 if (qemu_shutdown_requested()) {
2211 if (qemu_no_shutdown()) {
2212 vm_stop(0);
2213 } else
2214 break;
2215 } else if (qemu_powerdown_requested())
2216 qemu_system_powerdown();
2217 else if (qemu_reset_requested())
2218 qemu_kvm_system_reset();
2219 else if (kvm_debug_cpu_requested) {
2220 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2221 vm_stop(EXCP_DEBUG);
2222 kvm_debug_cpu_requested = NULL;
2226 pause_all_threads();
2227 pthread_mutex_unlock(&qemu_mutex);
2229 return 0;
2232 #ifdef TARGET_I386
2233 static int destroy_region_works = 0;
2234 #endif
2237 #if !defined(TARGET_I386)
2238 int kvm_arch_init_irq_routing(void)
2240 return 0;
2242 #endif
2244 int kvm_qemu_create_context(void)
2246 int r;
2248 if (!kvm_irqchip) {
2249 kvm_disable_irqchip_creation(kvm_context);
2251 if (!kvm_pit) {
2252 kvm_disable_pit_creation(kvm_context);
2254 if (kvm_create(kvm_context, 0, NULL) < 0) {
2255 kvm_finalize(kvm_state);
2256 return -1;
2258 r = kvm_arch_qemu_create_context();
2259 if(r <0)
2260 kvm_finalize(kvm_state);
2261 if (kvm_pit && !kvm_pit_reinject) {
2262 if (kvm_reinject_control(kvm_context, 0)) {
2263 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2264 return -1;
2267 #ifdef TARGET_I386
2268 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2269 #endif
2271 r = kvm_arch_init_irq_routing();
2272 if (r < 0) {
2273 return r;
2276 return 0;
2279 #ifdef TARGET_I386
2280 static int must_use_aliases_source(target_phys_addr_t addr)
2282 if (destroy_region_works)
2283 return false;
2284 if (addr == 0xa0000 || addr == 0xa8000)
2285 return true;
2286 return false;
2289 static int must_use_aliases_target(target_phys_addr_t addr)
2291 if (destroy_region_works)
2292 return false;
2293 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2294 return true;
2295 return false;
2298 static struct mapping {
2299 target_phys_addr_t phys;
2300 ram_addr_t ram;
2301 ram_addr_t len;
2302 } mappings[50];
2303 static int nr_mappings;
2305 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2307 struct mapping *p;
2309 for (p = mappings; p < mappings + nr_mappings; ++p) {
2310 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2311 return p;
2314 return NULL;
2317 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2319 struct mapping *p;
2321 for (p = mappings; p < mappings + nr_mappings; ++p) {
2322 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2323 return p;
2326 return NULL;
2329 static void drop_mapping(target_phys_addr_t start_addr)
2331 struct mapping *p = find_mapping(start_addr);
2333 if (p)
2334 *p = mappings[--nr_mappings];
2336 #endif
2338 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2339 ram_addr_t phys_offset)
2341 int r = 0;
2342 unsigned long area_flags;
2343 #ifdef TARGET_I386
2344 struct mapping *p;
2345 #endif
2347 if (start_addr + size > phys_ram_size) {
2348 phys_ram_size = start_addr + size;
2351 phys_offset &= ~IO_MEM_ROM;
2352 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2354 if (area_flags != IO_MEM_RAM) {
2355 #ifdef TARGET_I386
2356 if (must_use_aliases_source(start_addr)) {
2357 kvm_destroy_memory_alias(kvm_context, start_addr);
2358 return;
2360 if (must_use_aliases_target(start_addr))
2361 return;
2362 #endif
2363 while (size > 0) {
2364 p = find_mapping(start_addr);
2365 if (p) {
2366 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2367 drop_mapping(p->phys);
2369 start_addr += TARGET_PAGE_SIZE;
2370 if (size > TARGET_PAGE_SIZE) {
2371 size -= TARGET_PAGE_SIZE;
2372 } else {
2373 size = 0;
2376 return;
2379 r = kvm_is_containing_region(kvm_context, start_addr, size);
2380 if (r)
2381 return;
2383 if (area_flags >= TLB_MMIO)
2384 return;
2386 #ifdef TARGET_I386
2387 if (must_use_aliases_source(start_addr)) {
2388 p = find_ram_mapping(phys_offset);
2389 if (p) {
2390 kvm_create_memory_alias(kvm_context, start_addr, size,
2391 p->phys + (phys_offset - p->ram));
2393 return;
2395 #endif
2397 r = kvm_register_phys_mem(kvm_context, start_addr,
2398 qemu_get_ram_ptr(phys_offset),
2399 size, 0);
2400 if (r < 0) {
2401 printf("kvm_cpu_register_physical_memory: failed\n");
2402 exit(1);
2405 #ifdef TARGET_I386
2406 drop_mapping(start_addr);
2407 p = &mappings[nr_mappings++];
2408 p->phys = start_addr;
2409 p->ram = phys_offset;
2410 p->len = size;
2411 #endif
2413 return;
2416 int kvm_setup_guest_memory(void *area, unsigned long size)
2418 int ret = 0;
2420 #ifdef MADV_DONTFORK
2421 if (kvm_enabled() && !kvm_has_sync_mmu())
2422 ret = madvise(area, size, MADV_DONTFORK);
2423 #endif
2425 if (ret)
2426 perror ("madvise");
2428 return ret;
2431 int kvm_qemu_check_extension(int ext)
2433 return kvm_check_extension(kvm_context, ext);
2436 int kvm_qemu_init_env(CPUState *cenv)
2438 return kvm_arch_qemu_init_env(cenv);
2441 #ifdef KVM_CAP_SET_GUEST_DEBUG
2442 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
2443 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
2445 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
2447 struct kvm_sw_breakpoint *bp;
2449 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
2450 if (bp->pc == pc)
2451 return bp;
2453 return NULL;
2456 struct kvm_set_guest_debug_data {
2457 struct kvm_guest_debug dbg;
2458 int err;
2461 static void kvm_invoke_set_guest_debug(void *data)
2463 struct kvm_set_guest_debug_data *dbg_data = data;
2465 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2466 &dbg_data->dbg);
2469 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2471 struct kvm_set_guest_debug_data data;
2473 data.dbg.control = 0;
2474 if (env->singlestep_enabled)
2475 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2477 kvm_arch_update_guest_debug(env, &data.dbg);
2478 data.dbg.control |= reinject_trap;
2480 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2481 return data.err;
2484 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2485 target_ulong len, int type)
2487 struct kvm_sw_breakpoint *bp;
2488 CPUState *env;
2489 int err;
2491 if (type == GDB_BREAKPOINT_SW) {
2492 bp = kvm_find_sw_breakpoint(addr);
2493 if (bp) {
2494 bp->use_count++;
2495 return 0;
2498 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
2499 if (!bp)
2500 return -ENOMEM;
2502 bp->pc = addr;
2503 bp->use_count = 1;
2504 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
2505 if (err) {
2506 free(bp);
2507 return err;
2510 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
2511 } else {
2512 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
2513 if (err)
2514 return err;
2517 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2518 err = kvm_update_guest_debug(env, 0);
2519 if (err)
2520 return err;
2522 return 0;
2525 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2526 target_ulong len, int type)
2528 struct kvm_sw_breakpoint *bp;
2529 CPUState *env;
2530 int err;
2532 if (type == GDB_BREAKPOINT_SW) {
2533 bp = kvm_find_sw_breakpoint(addr);
2534 if (!bp)
2535 return -ENOENT;
2537 if (bp->use_count > 1) {
2538 bp->use_count--;
2539 return 0;
2542 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
2543 if (err)
2544 return err;
2546 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
2547 qemu_free(bp);
2548 } else {
2549 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
2550 if (err)
2551 return err;
2554 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2555 err = kvm_update_guest_debug(env, 0);
2556 if (err)
2557 return err;
2559 return 0;
2562 void kvm_remove_all_breakpoints(CPUState *current_env)
2564 struct kvm_sw_breakpoint *bp, *next;
2565 CPUState *env;
2567 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
2568 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
2569 /* Try harder to find a CPU that currently sees the breakpoint. */
2570 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2571 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
2572 break;
2576 kvm_arch_remove_all_hw_breakpoints();
2578 for (env = first_cpu; env != NULL; env = env->next_cpu)
2579 kvm_update_guest_debug(env, 0);
2582 #else /* !KVM_CAP_SET_GUEST_DEBUG */
2584 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2586 return -EINVAL;
2589 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2590 target_ulong len, int type)
2592 return -EINVAL;
2595 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2596 target_ulong len, int type)
2598 return -EINVAL;
2601 void kvm_remove_all_breakpoints(CPUState *current_env)
2604 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
2607 * dirty pages logging
2609 /* FIXME: use unsigned long pointer instead of unsigned char */
2610 unsigned char *kvm_dirty_bitmap = NULL;
2611 int kvm_physical_memory_set_dirty_tracking(int enable)
2613 int r = 0;
2615 if (!kvm_enabled())
2616 return 0;
2618 if (enable) {
2619 if (!kvm_dirty_bitmap) {
2620 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2621 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2622 if (kvm_dirty_bitmap == NULL) {
2623 perror("Failed to allocate dirty pages bitmap");
2624 r=-1;
2626 else {
2627 r = kvm_dirty_pages_log_enable_all(kvm_context);
2631 else {
2632 if (kvm_dirty_bitmap) {
2633 r = kvm_dirty_pages_log_reset(kvm_context);
2634 qemu_free(kvm_dirty_bitmap);
2635 kvm_dirty_bitmap = NULL;
2638 return r;
2641 /* get kvm's dirty pages bitmap and update qemu's */
2642 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2643 unsigned char *bitmap,
2644 unsigned long offset,
2645 unsigned long mem_size)
2647 unsigned int i, j, n=0;
2648 unsigned char c;
2649 unsigned long page_number, addr, addr1;
2650 ram_addr_t ram_addr;
2651 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2654 * bitmap-traveling is faster than memory-traveling (for addr...)
2655 * especially when most of the memory is not dirty.
2657 for (i=0; i<len; i++) {
2658 c = bitmap[i];
2659 while (c>0) {
2660 j = ffsl(c) - 1;
2661 c &= ~(1u<<j);
2662 page_number = i * 8 + j;
2663 addr1 = page_number * TARGET_PAGE_SIZE;
2664 addr = offset + addr1;
2665 ram_addr = cpu_get_physical_page_desc(addr);
2666 cpu_physical_memory_set_dirty(ram_addr);
2667 n++;
2670 return 0;
2672 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2673 void *bitmap, void *opaque)
2675 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2679 * get kvm's dirty pages bitmap and update qemu's
2680 * we only care about physical ram, which resides in slots 0 and 3
2682 int kvm_update_dirty_pages_log(void)
2684 int r = 0;
2687 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2688 NULL,
2689 kvm_get_dirty_bitmap_cb);
2690 return r;
2693 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2694 int log)
2696 if (log)
2697 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2698 else {
2699 #ifdef TARGET_I386
2700 if (must_use_aliases_target(start))
2701 return;
2702 #endif
2703 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2707 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2709 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2710 unsigned int brsize = BITMAP_SIZE(ram_size);
2711 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2712 unsigned int extra_bytes = (extra_pages +7)/8;
2713 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2714 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2716 memset(bitmap, 0xFF, brsize + extra_bytes);
2717 memset(bitmap + hole_start, 0, hole_end - hole_start);
2718 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2720 return 0;
2723 #ifdef KVM_CAP_IRQCHIP
2725 int kvm_set_irq(int irq, int level, int *status)
2727 return kvm_set_irq_level(kvm_context, irq, level, status);
2730 #endif
2732 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2734 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2737 void kvm_mutex_unlock(void)
2739 assert(!cpu_single_env);
2740 pthread_mutex_unlock(&qemu_mutex);
2743 void kvm_mutex_lock(void)
2745 pthread_mutex_lock(&qemu_mutex);
2746 cpu_single_env = NULL;
2749 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2750 void kvm_add_ioperm_data(struct ioperm_data *data)
2752 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2755 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2757 struct ioperm_data *data;
2759 data = LIST_FIRST(&ioperm_head);
2760 while (data) {
2761 struct ioperm_data *next = LIST_NEXT(data, entries);
2763 if (data->start_port == start_port && data->num == num) {
2764 LIST_REMOVE(data, entries);
2765 qemu_free(data);
2768 data = next;
2772 void kvm_ioperm(CPUState *env, void *data)
2774 if (kvm_enabled() && qemu_system_ready)
2775 on_vcpu(env, kvm_arch_do_ioperm, data);
2778 #endif
2780 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2782 #ifndef TARGET_IA64
2784 #ifdef TARGET_I386
2785 if (must_use_aliases_source(start_addr))
2786 return 0;
2787 #endif
2789 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2790 NULL, kvm_get_dirty_bitmap_cb);
2791 #endif
2792 return 0;
2795 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2797 #ifdef TARGET_I386
2798 if (must_use_aliases_source(phys_addr))
2799 return 0;
2800 #endif
2802 #ifndef TARGET_IA64
2803 kvm_qemu_log_memory(phys_addr, len, 1);
2804 #endif
2805 return 0;
2808 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2810 #ifdef TARGET_I386
2811 if (must_use_aliases_source(phys_addr))
2812 return 0;
2813 #endif
2815 #ifndef TARGET_IA64
2816 kvm_qemu_log_memory(phys_addr, len, 0);
2817 #endif
2818 return 0;
2821 void qemu_kvm_cpu_stop(CPUState *env)
2823 if (kvm_enabled())
2824 env->kvm_cpu_state.stopped = 1;
2827 int kvm_set_boot_cpu_id(uint32_t id)
2829 return kvm_set_boot_vcpu_id(kvm_context, id);