remove env->exit_request usage from qemu-kvm.c
[qemu-kvm/amd-iommu.git] / qemu-kvm.c
blobedd400e60398dc4d766c153715d9a4ccc76a8787
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->stopped = 1;
96 return handle;
98 #endif
100 #define PM_IO_BASE 0xb000
102 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
104 if (addr == 0xb2) {
105 switch (data) {
106 case 0: {
107 cpu_outb(0, 0xb3, 0);
108 break;
110 case 0xf0: {
111 unsigned x;
113 /* enable acpi */
114 x = cpu_inw(0, PM_IO_BASE + 4);
115 x &= ~1;
116 cpu_outw(0, PM_IO_BASE + 4, x);
117 break;
119 case 0xf1: {
120 unsigned x;
122 /* enable acpi */
123 x = cpu_inw(0, PM_IO_BASE + 4);
124 x |= 1;
125 cpu_outw(0, PM_IO_BASE + 4, x);
126 break;
128 default:
129 break;
131 return 0;
133 cpu_outb(0, addr, data);
134 return 0;
137 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
139 cpu_outw(0, addr, data);
140 return 0;
143 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
145 cpu_outl(0, addr, data);
146 return 0;
149 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
151 cpu_physical_memory_rw(addr, data, len, 0);
152 return 0;
155 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
157 cpu_physical_memory_rw(addr, data, len, 1);
158 return 0;
161 static int handle_unhandled(uint64_t reason)
163 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
164 return -EINVAL;
168 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
170 uint32_t *bitmap = kvm->used_gsi_bitmap;
172 if (gsi < kvm->max_gsi)
173 bitmap[gsi / 32] |= 1U << (gsi % 32);
174 else
175 DPRINTF("Invalid GSI %d\n");
178 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
180 uint32_t *bitmap = kvm->used_gsi_bitmap;
182 if (gsi < kvm->max_gsi)
183 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
184 else
185 DPRINTF("Invalid GSI %d\n");
188 struct slot_info {
189 unsigned long phys_addr;
190 unsigned long len;
191 unsigned long userspace_addr;
192 unsigned flags;
193 int logging_count;
196 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
198 static void init_slots(void)
200 int i;
202 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
203 slots[i].len = 0;
206 static int get_free_slot(kvm_context_t kvm)
208 int i;
209 int tss_ext;
211 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
212 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
213 #else
214 tss_ext = 0;
215 #endif
218 * on older kernels where the set tss ioctl is not supprted we must save
219 * slot 0 to hold the extended memory, as the vmx will use the last 3
220 * pages of this slot.
222 if (tss_ext > 0)
223 i = 0;
224 else
225 i = 1;
227 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
228 if (!slots[i].len)
229 return i;
230 return -1;
233 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
234 unsigned long userspace_addr, unsigned flags)
236 slots[slot].phys_addr = phys_addr;
237 slots[slot].len = len;
238 slots[slot].userspace_addr = userspace_addr;
239 slots[slot].flags = flags;
242 static void free_slot(int slot)
244 slots[slot].len = 0;
245 slots[slot].logging_count = 0;
248 static int get_slot(unsigned long phys_addr)
250 int i;
252 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
253 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
254 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
255 return i;
257 return -1;
260 /* Returns -1 if this slot is not totally contained on any other,
261 * and the number of the slot otherwise */
262 static int get_container_slot(uint64_t phys_addr, unsigned long size)
264 int i;
266 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
267 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
268 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
269 return i;
270 return -1;
273 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
275 int slot = get_container_slot(phys_addr, size);
276 if (slot == -1)
277 return 0;
278 return 1;
282 * dirty pages logging control
284 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
285 unsigned long phys_addr,
286 unsigned flags,
287 unsigned mask)
289 int r = -1;
290 int slot = get_slot(phys_addr);
292 if (slot == -1) {
293 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
294 return 1;
297 flags = (slots[slot].flags & ~mask) | flags;
298 if (flags == slots[slot].flags)
299 return 0;
300 slots[slot].flags = flags;
303 struct kvm_userspace_memory_region mem = {
304 .slot = slot,
305 .memory_size = slots[slot].len,
306 .guest_phys_addr = slots[slot].phys_addr,
307 .userspace_addr = slots[slot].userspace_addr,
308 .flags = slots[slot].flags,
312 DPRINTF("slot %d start %llx len %llx flags %x\n",
313 mem.slot,
314 mem.guest_phys_addr,
315 mem.memory_size,
316 mem.flags);
317 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
318 if (r < 0)
319 fprintf(stderr, "%s: %m\n", __FUNCTION__);
321 return r;
324 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
325 int (*change)(kvm_context_t kvm,
326 uint64_t start,
327 uint64_t len))
329 int i, r;
331 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
332 if (slots[i].len)
333 r = change(kvm, slots[i].phys_addr, slots[i].len);
335 return r;
338 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
339 uint64_t phys_addr,
340 uint64_t len)
342 int slot = get_slot(phys_addr);
344 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
345 if (slot == -1) {
346 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
347 return -EINVAL;
350 if (slots[slot].logging_count++)
351 return 0;
353 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
354 KVM_MEM_LOG_DIRTY_PAGES,
355 KVM_MEM_LOG_DIRTY_PAGES);
358 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
359 uint64_t phys_addr,
360 uint64_t len)
362 int slot = get_slot(phys_addr);
364 if (slot == -1) {
365 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
366 return -EINVAL;
369 if (--slots[slot].logging_count)
370 return 0;
372 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
374 KVM_MEM_LOG_DIRTY_PAGES);
378 * Enable dirty page logging for all memory regions
380 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
382 if (kvm->dirty_pages_log_all)
383 return 0;
384 kvm->dirty_pages_log_all = 1;
385 return kvm_dirty_pages_log_change_all(kvm,
386 kvm_dirty_pages_log_enable_slot);
390 * Enable dirty page logging only for memory regions that were created with
391 * dirty logging enabled (disable for all other memory regions).
393 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
395 if (!kvm->dirty_pages_log_all)
396 return 0;
397 kvm->dirty_pages_log_all = 0;
398 return kvm_dirty_pages_log_change_all(kvm,
399 kvm_dirty_pages_log_disable_slot);
403 int kvm_init(int smp_cpus)
405 int fd;
406 int r, gsi_count;
409 fd = open("/dev/kvm", O_RDWR);
410 if (fd == -1) {
411 perror("open /dev/kvm");
412 return -1;
414 r = ioctl(fd, KVM_GET_API_VERSION, 0);
415 if (r == -1) {
416 fprintf(stderr, "kvm kernel version too old: "
417 "KVM_GET_API_VERSION ioctl not supported\n");
418 goto out_close;
420 if (r < EXPECTED_KVM_API_VERSION) {
421 fprintf(stderr, "kvm kernel version too old: "
422 "We expect API version %d or newer, but got "
423 "version %d\n",
424 EXPECTED_KVM_API_VERSION, r);
425 goto out_close;
427 if (r > EXPECTED_KVM_API_VERSION) {
428 fprintf(stderr, "kvm userspace version too old\n");
429 goto out_close;
431 kvm_abi = r;
432 kvm_page_size = getpagesize();
433 kvm_state = qemu_mallocz(sizeof(*kvm_state));
434 kvm_context = &kvm_state->kvm_context;
436 kvm_state->fd = fd;
437 kvm_state->vmfd = -1;
438 kvm_context->opaque = cpu_single_env;
439 kvm_context->dirty_pages_log_all = 0;
440 kvm_context->no_irqchip_creation = 0;
441 kvm_context->no_pit_creation = 0;
443 #ifdef KVM_CAP_SET_GUEST_DEBUG
444 TAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
445 #endif
447 gsi_count = kvm_get_gsi_count(kvm_context);
448 if (gsi_count > 0) {
449 int gsi_bits, i;
451 /* Round up so we can search ints using ffs */
452 gsi_bits = ALIGN(gsi_count, 32);
453 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
454 kvm_context->max_gsi = gsi_bits;
456 /* Mark any over-allocated bits as already in use */
457 for (i = gsi_count; i < gsi_bits; i++)
458 set_gsi(kvm_context, i);
461 pthread_mutex_lock(&qemu_mutex);
462 return 0;
464 out_close:
465 close(fd);
466 return -1;
469 static void kvm_finalize(KVMState *s)
471 /* FIXME
472 if (kvm->vcpu_fd[0] != -1)
473 close(kvm->vcpu_fd[0]);
474 if (kvm->vm_fd != -1)
475 close(kvm->vm_fd);
477 close(s->fd);
478 free(s);
481 void kvm_disable_irqchip_creation(kvm_context_t kvm)
483 kvm->no_irqchip_creation = 1;
486 void kvm_disable_pit_creation(kvm_context_t kvm)
488 kvm->no_pit_creation = 1;
491 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
493 long mmap_size;
494 int r;
495 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
496 kvm_context_t kvm = kvm_context;
498 vcpu_ctx->kvm = kvm;
499 vcpu_ctx->id = id;
501 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
502 if (r < 0) {
503 fprintf(stderr, "kvm_create_vcpu: %m\n");
504 goto err;
506 vcpu_ctx->fd = r;
508 env->kvm_fd = r;
509 env->kvm_state = kvm_state;
511 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
512 if (mmap_size < 0) {
513 fprintf(stderr, "get vcpu mmap size: %m\n");
514 goto err_fd;
516 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
517 vcpu_ctx->fd, 0);
518 if (vcpu_ctx->run == MAP_FAILED) {
519 fprintf(stderr, "mmap vcpu area: %m\n");
520 goto err_fd;
522 return vcpu_ctx;
523 err_fd:
524 close(vcpu_ctx->fd);
525 err:
526 free(vcpu_ctx);
527 return NULL;
530 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
532 #ifdef KVM_CAP_SET_BOOT_CPU_ID
533 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
534 if (r > 0)
535 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
536 return -ENOSYS;
537 #else
538 return -ENOSYS;
539 #endif
542 int kvm_create_vm(kvm_context_t kvm)
544 int fd;
545 #ifdef KVM_CAP_IRQ_ROUTING
546 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
547 kvm->nr_allocated_irq_routes = 0;
548 #endif
550 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
551 if (fd < 0) {
552 fprintf(stderr, "kvm_create_vm: %m\n");
553 return -1;
555 kvm_state->vmfd = fd;
556 return 0;
559 static int kvm_create_default_phys_mem(kvm_context_t kvm,
560 unsigned long phys_mem_bytes,
561 void **vm_mem)
563 #ifdef KVM_CAP_USER_MEMORY
564 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
565 if (r > 0)
566 return 0;
567 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
568 #else
569 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
570 #endif
571 return -1;
574 void kvm_create_irqchip(kvm_context_t kvm)
576 int r;
578 kvm->irqchip_in_kernel = 0;
579 #ifdef KVM_CAP_IRQCHIP
580 if (!kvm->no_irqchip_creation) {
581 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
582 if (r > 0) { /* kernel irqchip supported */
583 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
584 if (r >= 0) {
585 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
586 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
587 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
588 KVM_CAP_IRQ_INJECT_STATUS);
589 if (r > 0)
590 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
591 #endif
592 kvm->irqchip_in_kernel = 1;
594 else
595 fprintf(stderr, "Create kernel PIC irqchip failed\n");
598 #endif
601 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
603 int r;
605 r = kvm_create_vm(kvm);
606 if (r < 0)
607 return r;
608 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
609 if (r < 0)
610 return r;
611 init_slots();
612 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
613 if (r < 0)
614 return r;
615 kvm_create_irqchip(kvm);
617 return 0;
621 int kvm_register_phys_mem(kvm_context_t kvm,
622 unsigned long phys_start, void *userspace_addr,
623 unsigned long len, int log)
626 struct kvm_userspace_memory_region memory = {
627 .memory_size = len,
628 .guest_phys_addr = phys_start,
629 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
630 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
632 int r;
634 memory.slot = get_free_slot(kvm);
635 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
636 memory.guest_phys_addr, memory.memory_size,
637 memory.userspace_addr, memory.slot, memory.flags);
638 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
639 if (r < 0) {
640 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
641 return -1;
643 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
644 memory.userspace_addr, memory.flags);
645 return 0;
649 /* destroy/free a whole slot.
650 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
652 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
653 unsigned long len)
655 int slot;
656 int r;
657 struct kvm_userspace_memory_region memory = {
658 .memory_size = 0,
659 .guest_phys_addr = phys_start,
660 .userspace_addr = 0,
661 .flags = 0,
664 slot = get_slot(phys_start);
666 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
667 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
668 __FUNCTION__, slot);
669 return;
671 if (phys_start != slots[slot].phys_addr) {
672 fprintf(stderr,
673 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
674 __FUNCTION__, phys_start, slots[slot].phys_addr);
675 phys_start = slots[slot].phys_addr;
678 memory.slot = slot;
679 DPRINTF("slot %d start %llx len %llx flags %x\n",
680 memory.slot,
681 memory.guest_phys_addr,
682 memory.memory_size,
683 memory.flags);
684 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
685 if (r < 0) {
686 fprintf(stderr, "destroy_userspace_phys_mem: %s",
687 strerror(-r));
688 return;
691 free_slot(memory.slot);
694 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
697 int slot = get_container_slot(phys_addr, size);
699 if (slot != -1) {
700 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
701 kvm_destroy_phys_mem(kvm, phys_addr, size);
702 return;
706 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
708 int r;
709 struct kvm_dirty_log log = {
710 .slot = slot,
713 log.dirty_bitmap = buf;
715 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
716 if (r < 0)
717 return r;
718 return 0;
721 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
723 int slot;
725 slot = get_slot(phys_addr);
726 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
729 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
730 unsigned long len, void *opaque,
731 int (*cb)(unsigned long start, unsigned long len,
732 void*bitmap, void *opaque))
734 int i;
735 int r;
736 unsigned long end_addr = phys_addr + len;
737 void *buf;
739 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
740 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
741 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
742 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
743 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
744 if (r) {
745 qemu_free(buf);
746 return r;
748 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
749 qemu_free(buf);
750 if (r)
751 return r;
754 return 0;
757 #ifdef KVM_CAP_IRQCHIP
759 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
761 struct kvm_irq_level event;
762 int r;
764 if (!kvm->irqchip_in_kernel)
765 return 0;
766 event.level = level;
767 event.irq = irq;
768 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
769 if (r < 0)
770 perror("kvm_set_irq_level");
772 if (status) {
773 #ifdef KVM_CAP_IRQ_INJECT_STATUS
774 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
775 1 : event.status;
776 #else
777 *status = 1;
778 #endif
781 return 1;
784 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
786 int r;
788 if (!kvm->irqchip_in_kernel)
789 return 0;
790 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
791 if (r < 0) {
792 perror("kvm_get_irqchip\n");
794 return r;
797 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
799 int r;
801 if (!kvm->irqchip_in_kernel)
802 return 0;
803 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
804 if (r < 0) {
805 perror("kvm_set_irqchip\n");
807 return r;
810 #endif
812 static int handle_io(kvm_vcpu_context_t vcpu)
814 struct kvm_run *run = vcpu->run;
815 kvm_context_t kvm = vcpu->kvm;
816 uint16_t addr = run->io.port;
817 int r;
818 int i;
819 void *p = (void *)run + run->io.data_offset;
821 for (i = 0; i < run->io.count; ++i) {
822 switch (run->io.direction) {
823 case KVM_EXIT_IO_IN:
824 r = 0;
825 switch (run->io.size) {
826 case 1:
827 *(uint8_t *)p = cpu_inb(kvm->opaque, addr);
828 break;
829 case 2:
830 *(uint16_t *)p = cpu_inw(kvm->opaque, addr);
831 break;
832 case 4:
833 *(uint32_t *)p = cpu_inl(kvm->opaque, addr);
834 break;
835 default:
836 fprintf(stderr, "bad I/O size %d\n", run->io.size);
837 return -EMSGSIZE;
839 break;
840 case KVM_EXIT_IO_OUT:
841 switch (run->io.size) {
842 case 1:
843 r = kvm_outb(kvm->opaque, addr,
844 *(uint8_t *)p);
845 break;
846 case 2:
847 r = kvm_outw(kvm->opaque, addr,
848 *(uint16_t *)p);
849 break;
850 case 4:
851 r = kvm_outl(kvm->opaque, addr,
852 *(uint32_t *)p);
853 break;
854 default:
855 fprintf(stderr, "bad I/O size %d\n", run->io.size);
856 return -EMSGSIZE;
858 break;
859 default:
860 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
861 return -EPROTO;
864 p += run->io.size;
867 return 0;
870 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
872 #ifdef KVM_CAP_SET_GUEST_DEBUG
873 struct kvm_run *run = vcpu->run;
874 kvm_context_t kvm = vcpu->kvm;
876 return kvm_debug(kvm->opaque, env, &run->debug.arch);
877 #else
878 return 0;
879 #endif
882 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
884 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
887 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
889 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
892 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
894 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
897 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
899 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
902 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
904 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
907 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
909 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
912 #ifdef KVM_CAP_MP_STATE
913 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
915 int r;
917 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
918 if (r > 0)
919 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
920 return -ENOSYS;
923 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
925 int r;
927 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
928 if (r > 0)
929 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
930 return -ENOSYS;
932 #endif
934 static int handle_mmio(kvm_vcpu_context_t vcpu)
936 unsigned long addr = vcpu->run->mmio.phys_addr;
937 kvm_context_t kvm = vcpu->kvm;
938 struct kvm_run *kvm_run = vcpu->run;
939 void *data = kvm_run->mmio.data;
941 /* hack: Red Hat 7.1 generates these weird accesses. */
942 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
943 return 0;
945 if (kvm_run->mmio.is_write)
946 return kvm_mmio_write(kvm->opaque, addr, data,
947 kvm_run->mmio.len);
948 else
949 return kvm_mmio_read(kvm->opaque, addr, data,
950 kvm_run->mmio.len);
953 int handle_io_window(kvm_context_t kvm)
955 return 1;
958 int handle_halt(kvm_vcpu_context_t vcpu)
960 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
963 int handle_shutdown(kvm_context_t kvm, CPUState *env)
965 /* stop the current vcpu from going back to guest mode */
966 env->stopped = 1;
968 qemu_system_reset_request();
969 return 1;
972 static inline void push_nmi(kvm_context_t kvm)
974 #ifdef KVM_CAP_USER_NMI
975 kvm_arch_push_nmi(kvm->opaque);
976 #endif /* KVM_CAP_USER_NMI */
979 void post_kvm_run(kvm_context_t kvm, CPUState *env)
981 pthread_mutex_lock(&qemu_mutex);
982 kvm_arch_post_kvm_run(kvm->opaque, env);
985 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
987 kvm_arch_pre_kvm_run(kvm->opaque, env);
989 pthread_mutex_unlock(&qemu_mutex);
990 return 0;
993 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
995 return vcpu->run->if_flag;
998 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1000 return vcpu->run->ready_for_interrupt_injection;
1003 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1005 int r;
1006 int fd = vcpu->fd;
1007 struct kvm_run *run = vcpu->run;
1008 kvm_context_t kvm = vcpu->kvm;
1010 again:
1011 push_nmi(kvm);
1012 #if !defined(__s390__)
1013 if (!kvm->irqchip_in_kernel)
1014 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1015 #endif
1016 r = pre_kvm_run(kvm, env);
1017 if (r)
1018 return r;
1019 r = ioctl(fd, KVM_RUN, 0);
1021 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1022 r = -errno;
1023 post_kvm_run(kvm, env);
1024 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1025 return r;
1028 post_kvm_run(kvm, env);
1030 #if defined(KVM_CAP_COALESCED_MMIO)
1031 if (kvm->coalesced_mmio) {
1032 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1033 kvm->coalesced_mmio * PAGE_SIZE;
1034 while (ring->first != ring->last) {
1035 kvm_mmio_write(kvm->opaque,
1036 ring->coalesced_mmio[ring->first].phys_addr,
1037 &ring->coalesced_mmio[ring->first].data[0],
1038 ring->coalesced_mmio[ring->first].len);
1039 smp_wmb();
1040 ring->first = (ring->first + 1) %
1041 KVM_COALESCED_MMIO_MAX;
1044 #endif
1046 #if !defined(__s390__)
1047 if (r == -1) {
1048 r = handle_io_window(kvm);
1049 goto more;
1051 #endif
1052 if (1) {
1053 switch (run->exit_reason) {
1054 case KVM_EXIT_UNKNOWN:
1055 r = handle_unhandled(run->hw.hardware_exit_reason);
1056 break;
1057 case KVM_EXIT_FAIL_ENTRY:
1058 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1059 break;
1060 case KVM_EXIT_EXCEPTION:
1061 fprintf(stderr, "exception %d (%x)\n",
1062 run->ex.exception,
1063 run->ex.error_code);
1064 kvm_show_regs(vcpu);
1065 kvm_show_code(vcpu);
1066 abort();
1067 break;
1068 case KVM_EXIT_IO:
1069 r = handle_io(vcpu);
1070 break;
1071 case KVM_EXIT_DEBUG:
1072 r = handle_debug(vcpu, env);
1073 break;
1074 case KVM_EXIT_MMIO:
1075 r = handle_mmio(vcpu);
1076 break;
1077 case KVM_EXIT_HLT:
1078 r = handle_halt(vcpu);
1079 break;
1080 case KVM_EXIT_IRQ_WINDOW_OPEN:
1081 break;
1082 case KVM_EXIT_SHUTDOWN:
1083 r = handle_shutdown(kvm, env);
1084 break;
1085 #if defined(__s390__)
1086 case KVM_EXIT_S390_SIEIC:
1087 r = kvm_s390_handle_intercept(kvm, vcpu,
1088 run);
1089 break;
1090 case KVM_EXIT_S390_RESET:
1091 r = kvm_s390_handle_reset(kvm, vcpu, run);
1092 break;
1093 #endif
1094 default:
1095 if (kvm_arch_run(vcpu)) {
1096 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1097 run->exit_reason);
1098 kvm_show_regs(vcpu);
1099 abort();
1101 break;
1104 more:
1105 if (!r)
1106 goto again;
1107 return r;
1110 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1112 struct kvm_interrupt intr;
1114 intr.irq = irq;
1115 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1118 #ifdef KVM_CAP_SET_GUEST_DEBUG
1119 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1121 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1123 #endif
1125 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1127 struct kvm_signal_mask *sigmask;
1128 int r;
1130 if (!sigset) {
1131 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1132 if (r == -1)
1133 r = -errno;
1134 return r;
1136 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1138 sigmask->len = 8;
1139 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1140 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1141 if (r == -1)
1142 r = -errno;
1143 free(sigmask);
1144 return r;
1147 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1149 return kvm->irqchip_in_kernel;
1152 int kvm_pit_in_kernel(kvm_context_t kvm)
1154 return kvm->pit_in_kernel;
1157 int kvm_has_sync_mmu(void)
1159 int r = 0;
1160 #ifdef KVM_CAP_SYNC_MMU
1161 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1162 #endif
1163 return r;
1166 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1168 #ifdef KVM_CAP_USER_NMI
1169 return ioctl(vcpu->fd, KVM_NMI);
1170 #else
1171 return -ENOSYS;
1172 #endif
1175 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1177 int r = 0;
1178 kvm->coalesced_mmio = 0;
1179 #ifdef KVM_CAP_COALESCED_MMIO
1180 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1181 if (r > 0) {
1182 kvm->coalesced_mmio = r;
1183 return 0;
1185 #endif
1186 return r;
1189 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1191 #ifdef KVM_CAP_COALESCED_MMIO
1192 kvm_context_t kvm = kvm_context;
1193 struct kvm_coalesced_mmio_zone zone;
1194 int r;
1196 if (kvm->coalesced_mmio) {
1198 zone.addr = addr;
1199 zone.size = size;
1201 r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone);
1202 if (r < 0) {
1203 perror("kvm_register_coalesced_mmio_zone");
1204 return r;
1206 return 0;
1208 #endif
1209 return -ENOSYS;
1212 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1214 #ifdef KVM_CAP_COALESCED_MMIO
1215 kvm_context_t kvm = kvm_context;
1216 struct kvm_coalesced_mmio_zone zone;
1217 int r;
1219 if (kvm->coalesced_mmio) {
1221 zone.addr = addr;
1222 zone.size = size;
1224 r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1225 if (r < 0) {
1226 perror("kvm_unregister_coalesced_mmio_zone");
1227 return r;
1229 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1230 return 0;
1232 #endif
1233 return -ENOSYS;
1236 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1237 int kvm_assign_pci_device(kvm_context_t kvm,
1238 struct kvm_assigned_pci_dev *assigned_dev)
1240 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1243 static int kvm_old_assign_irq(kvm_context_t kvm,
1244 struct kvm_assigned_irq *assigned_irq)
1246 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1249 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1250 int kvm_assign_irq(kvm_context_t kvm,
1251 struct kvm_assigned_irq *assigned_irq)
1253 int ret;
1255 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1256 if (ret > 0) {
1257 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1260 return kvm_old_assign_irq(kvm, assigned_irq);
1263 int kvm_deassign_irq(kvm_context_t kvm,
1264 struct kvm_assigned_irq *assigned_irq)
1266 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1268 #else
1269 int kvm_assign_irq(kvm_context_t kvm,
1270 struct kvm_assigned_irq *assigned_irq)
1272 return kvm_old_assign_irq(kvm, assigned_irq);
1274 #endif
1275 #endif
1277 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1278 int kvm_deassign_pci_device(kvm_context_t kvm,
1279 struct kvm_assigned_pci_dev *assigned_dev)
1281 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1283 #endif
1285 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1287 int ret = 0;
1289 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1290 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1291 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1292 if (ret <= 0)
1293 ret = 0;
1294 #endif
1295 return ret;
1298 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1300 #ifdef KVM_CAP_REINJECT_CONTROL
1301 int r;
1302 struct kvm_reinject_control control;
1304 control.pit_reinject = pit_reinject;
1306 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1307 if (r > 0) {
1308 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1310 #endif
1311 return -ENOSYS;
1314 int kvm_has_gsi_routing(kvm_context_t kvm)
1316 int r = 0;
1318 #ifdef KVM_CAP_IRQ_ROUTING
1319 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1320 #endif
1321 return r;
1324 int kvm_get_gsi_count(kvm_context_t kvm)
1326 #ifdef KVM_CAP_IRQ_ROUTING
1327 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1328 #else
1329 return -EINVAL;
1330 #endif
1333 int kvm_clear_gsi_routes(kvm_context_t kvm)
1335 #ifdef KVM_CAP_IRQ_ROUTING
1336 kvm->irq_routes->nr = 0;
1337 return 0;
1338 #else
1339 return -EINVAL;
1340 #endif
1343 int kvm_add_routing_entry(kvm_context_t kvm,
1344 struct kvm_irq_routing_entry* entry)
1346 #ifdef KVM_CAP_IRQ_ROUTING
1347 struct kvm_irq_routing *z;
1348 struct kvm_irq_routing_entry *new;
1349 int n, size;
1351 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1352 n = kvm->nr_allocated_irq_routes * 2;
1353 if (n < 64)
1354 n = 64;
1355 size = sizeof(struct kvm_irq_routing);
1356 size += n * sizeof(*new);
1357 z = realloc(kvm->irq_routes, size);
1358 if (!z)
1359 return -ENOMEM;
1360 kvm->nr_allocated_irq_routes = n;
1361 kvm->irq_routes = z;
1363 n = kvm->irq_routes->nr++;
1364 new = &kvm->irq_routes->entries[n];
1365 memset(new, 0, sizeof(*new));
1366 new->gsi = entry->gsi;
1367 new->type = entry->type;
1368 new->flags = entry->flags;
1369 new->u = entry->u;
1371 set_gsi(kvm, entry->gsi);
1373 return 0;
1374 #else
1375 return -ENOSYS;
1376 #endif
1379 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1381 #ifdef KVM_CAP_IRQ_ROUTING
1382 struct kvm_irq_routing_entry e;
1384 e.gsi = gsi;
1385 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1386 e.flags = 0;
1387 e.u.irqchip.irqchip = irqchip;
1388 e.u.irqchip.pin = pin;
1389 return kvm_add_routing_entry(kvm, &e);
1390 #else
1391 return -ENOSYS;
1392 #endif
1395 int kvm_del_routing_entry(kvm_context_t kvm,
1396 struct kvm_irq_routing_entry* entry)
1398 #ifdef KVM_CAP_IRQ_ROUTING
1399 struct kvm_irq_routing_entry *e, *p;
1400 int i, gsi, found = 0;
1402 gsi = entry->gsi;
1404 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1405 e = &kvm->irq_routes->entries[i];
1406 if (e->type == entry->type
1407 && e->gsi == gsi) {
1408 switch (e->type)
1410 case KVM_IRQ_ROUTING_IRQCHIP: {
1411 if (e->u.irqchip.irqchip ==
1412 entry->u.irqchip.irqchip
1413 && e->u.irqchip.pin ==
1414 entry->u.irqchip.pin) {
1415 p = &kvm->irq_routes->
1416 entries[--kvm->irq_routes->nr];
1417 *e = *p;
1418 found = 1;
1420 break;
1422 case KVM_IRQ_ROUTING_MSI: {
1423 if (e->u.msi.address_lo ==
1424 entry->u.msi.address_lo
1425 && e->u.msi.address_hi ==
1426 entry->u.msi.address_hi
1427 && e->u.msi.data == entry->u.msi.data) {
1428 p = &kvm->irq_routes->
1429 entries[--kvm->irq_routes->nr];
1430 *e = *p;
1431 found = 1;
1433 break;
1435 default:
1436 break;
1438 if (found) {
1439 /* If there are no other users of this GSI
1440 * mark it available in the bitmap */
1441 for (i = 0; i < kvm->irq_routes->nr; i++) {
1442 e = &kvm->irq_routes->entries[i];
1443 if (e->gsi == gsi)
1444 break;
1446 if (i == kvm->irq_routes->nr)
1447 clear_gsi(kvm, gsi);
1449 return 0;
1453 return -ESRCH;
1454 #else
1455 return -ENOSYS;
1456 #endif
1459 int kvm_update_routing_entry(kvm_context_t kvm,
1460 struct kvm_irq_routing_entry* entry,
1461 struct kvm_irq_routing_entry* newentry)
1463 #ifdef KVM_CAP_IRQ_ROUTING
1464 struct kvm_irq_routing_entry *e;
1465 int i;
1467 if (entry->gsi != newentry->gsi ||
1468 entry->type != newentry->type) {
1469 return -EINVAL;
1472 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1473 e = &kvm->irq_routes->entries[i];
1474 if (e->type != entry->type || e->gsi != entry->gsi) {
1475 continue;
1477 switch (e->type) {
1478 case KVM_IRQ_ROUTING_IRQCHIP:
1479 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1480 e->u.irqchip.pin == entry->u.irqchip.pin) {
1481 memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
1482 return 0;
1484 break;
1485 case KVM_IRQ_ROUTING_MSI:
1486 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1487 e->u.msi.address_hi == entry->u.msi.address_hi &&
1488 e->u.msi.data == entry->u.msi.data) {
1489 memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
1490 return 0;
1492 break;
1493 default:
1494 break;
1497 return -ESRCH;
1498 #else
1499 return -ENOSYS;
1500 #endif
1503 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1505 #ifdef KVM_CAP_IRQ_ROUTING
1506 struct kvm_irq_routing_entry e;
1508 e.gsi = gsi;
1509 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1510 e.flags = 0;
1511 e.u.irqchip.irqchip = irqchip;
1512 e.u.irqchip.pin = pin;
1513 return kvm_del_routing_entry(kvm, &e);
1514 #else
1515 return -ENOSYS;
1516 #endif
1519 int kvm_commit_irq_routes(kvm_context_t kvm)
1521 #ifdef KVM_CAP_IRQ_ROUTING
1522 kvm->irq_routes->flags = 0;
1523 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1524 #else
1525 return -ENOSYS;
1526 #endif
1529 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1531 int i, bit;
1532 uint32_t *buf = kvm->used_gsi_bitmap;
1534 /* Return the lowest unused GSI in the bitmap */
1535 for (i = 0; i < kvm->max_gsi / 32; i++) {
1536 bit = ffs(~buf[i]);
1537 if (!bit)
1538 continue;
1540 return bit - 1 + i * 32;
1543 return -ENOSPC;
1546 #ifdef KVM_CAP_DEVICE_MSIX
1547 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1548 struct kvm_assigned_msix_nr *msix_nr)
1550 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1553 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1554 struct kvm_assigned_msix_entry *entry)
1556 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1558 #endif
1560 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1562 #include <sys/eventfd.h>
1564 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1566 struct kvm_irqfd data = {
1567 .fd = fd,
1568 .gsi = gsi,
1569 .flags = flags,
1572 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1575 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1577 int r;
1578 int fd;
1580 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1581 return -ENOENT;
1583 fd = eventfd(0, 0);
1584 if (fd < 0)
1585 return -errno;
1587 r = _kvm_irqfd(kvm, fd, gsi, 0);
1588 if (r < 0) {
1589 close(fd);
1590 return -errno;
1593 return fd;
1596 #else /* KVM_CAP_IRQFD */
1598 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1600 return -ENOSYS;
1603 #endif /* KVM_CAP_IRQFD */
1604 static inline unsigned long kvm_get_thread_id(void)
1606 return syscall(SYS_gettid);
1609 static void qemu_cond_wait(pthread_cond_t *cond)
1611 CPUState *env = cpu_single_env;
1612 static const struct timespec ts = {
1613 .tv_sec = 0,
1614 .tv_nsec = 100000,
1617 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1618 cpu_single_env = env;
1621 static void sig_ipi_handler(int n)
1625 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1627 struct qemu_work_item wi;
1629 if (env == current_env) {
1630 func(data);
1631 return;
1634 wi.func = func;
1635 wi.data = data;
1636 if (!env->kvm_cpu_state.queued_work_first)
1637 env->kvm_cpu_state.queued_work_first = &wi;
1638 else
1639 env->kvm_cpu_state.queued_work_last->next = &wi;
1640 env->kvm_cpu_state.queued_work_last = &wi;
1641 wi.next = NULL;
1642 wi.done = false;
1644 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1645 while (!wi.done)
1646 qemu_cond_wait(&qemu_work_cond);
1649 static void inject_interrupt(void *data)
1651 cpu_interrupt(current_env, (long)data);
1654 void kvm_inject_interrupt(CPUState *env, int mask)
1656 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1659 void kvm_update_interrupt_request(CPUState *env)
1661 int signal = 0;
1663 if (env) {
1664 if (!current_env || !current_env->created)
1665 signal = 1;
1667 * Testing for created here is really redundant
1669 if (current_env && current_env->created &&
1670 env != current_env && !env->kvm_cpu_state.signalled)
1671 signal = 1;
1673 if (signal) {
1674 env->kvm_cpu_state.signalled = 1;
1675 if (env->kvm_cpu_state.thread)
1676 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1681 static void kvm_do_load_registers(void *_env)
1683 CPUState *env = _env;
1685 kvm_arch_load_regs(env);
1688 void kvm_load_registers(CPUState *env)
1690 if (kvm_enabled() && qemu_system_ready)
1691 on_vcpu(env, kvm_do_load_registers, env);
1694 static void kvm_do_save_registers(void *_env)
1696 CPUState *env = _env;
1698 kvm_arch_save_regs(env);
1701 void kvm_save_registers(CPUState *env)
1703 if (kvm_enabled())
1704 on_vcpu(env, kvm_do_save_registers, env);
1707 static void kvm_do_load_mpstate(void *_env)
1709 CPUState *env = _env;
1711 kvm_arch_load_mpstate(env);
1714 void kvm_load_mpstate(CPUState *env)
1716 if (kvm_enabled() && qemu_system_ready)
1717 on_vcpu(env, kvm_do_load_mpstate, env);
1720 static void kvm_do_save_mpstate(void *_env)
1722 CPUState *env = _env;
1724 kvm_arch_save_mpstate(env);
1725 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1728 void kvm_save_mpstate(CPUState *env)
1730 if (kvm_enabled())
1731 on_vcpu(env, kvm_do_save_mpstate, env);
1734 int kvm_cpu_exec(CPUState *env)
1736 int r;
1738 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1739 if (r < 0) {
1740 printf("kvm_run returned %d\n", r);
1741 vm_stop(0);
1744 return 0;
1747 static int is_cpu_stopped(CPUState *env)
1749 return !vm_running || env->stopped;
1752 static void flush_queued_work(CPUState *env)
1754 struct qemu_work_item *wi;
1756 if (!env->kvm_cpu_state.queued_work_first)
1757 return;
1759 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1760 env->kvm_cpu_state.queued_work_first = wi->next;
1761 wi->func(wi->data);
1762 wi->done = true;
1764 env->kvm_cpu_state.queued_work_last = NULL;
1765 pthread_cond_broadcast(&qemu_work_cond);
1768 static void kvm_main_loop_wait(CPUState *env, int timeout)
1770 struct timespec ts;
1771 int r, e;
1772 siginfo_t siginfo;
1773 sigset_t waitset;
1775 pthread_mutex_unlock(&qemu_mutex);
1777 ts.tv_sec = timeout / 1000;
1778 ts.tv_nsec = (timeout % 1000) * 1000000;
1779 sigemptyset(&waitset);
1780 sigaddset(&waitset, SIG_IPI);
1782 r = sigtimedwait(&waitset, &siginfo, &ts);
1783 e = errno;
1785 pthread_mutex_lock(&qemu_mutex);
1787 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1788 printf("sigtimedwait: %s\n", strerror(e));
1789 exit(1);
1792 cpu_single_env = env;
1793 flush_queued_work(env);
1795 if (env->stop) {
1796 env->stop = 0;
1797 env->stopped = 1;
1798 pthread_cond_signal(&qemu_pause_cond);
1801 env->kvm_cpu_state.signalled = 0;
1804 static int all_threads_paused(void)
1806 CPUState *penv = first_cpu;
1808 while (penv) {
1809 if (penv->stop)
1810 return 0;
1811 penv = (CPUState *)penv->next_cpu;
1814 return 1;
1817 static void pause_all_threads(void)
1819 CPUState *penv = first_cpu;
1821 while (penv) {
1822 if (penv != cpu_single_env) {
1823 penv->stop = 1;
1824 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1825 } else {
1826 penv->stop = 0;
1827 penv->stopped = 1;
1828 cpu_exit(penv);
1830 penv = (CPUState *)penv->next_cpu;
1833 while (!all_threads_paused())
1834 qemu_cond_wait(&qemu_pause_cond);
1837 static void resume_all_threads(void)
1839 CPUState *penv = first_cpu;
1841 assert(!cpu_single_env);
1843 while (penv) {
1844 penv->stop = 0;
1845 penv->stopped = 0;
1846 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1847 penv = (CPUState *)penv->next_cpu;
1851 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1853 if (running)
1854 resume_all_threads();
1855 else
1856 pause_all_threads();
1859 static void setup_kernel_sigmask(CPUState *env)
1861 sigset_t set;
1863 sigemptyset(&set);
1864 sigaddset(&set, SIGUSR2);
1865 sigaddset(&set, SIGIO);
1866 sigaddset(&set, SIGALRM);
1867 sigprocmask(SIG_BLOCK, &set, NULL);
1869 sigprocmask(SIG_BLOCK, NULL, &set);
1870 sigdelset(&set, SIG_IPI);
1872 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1875 static void qemu_kvm_system_reset(void)
1877 CPUState *penv = first_cpu;
1879 pause_all_threads();
1881 qemu_system_reset();
1883 while (penv) {
1884 kvm_arch_cpu_reset(penv);
1885 penv = (CPUState *)penv->next_cpu;
1888 resume_all_threads();
1891 static void process_irqchip_events(CPUState *env)
1893 kvm_arch_process_irqchip_events(env);
1894 if (kvm_arch_has_work(env))
1895 env->halted = 0;
1898 static int kvm_main_loop_cpu(CPUState *env)
1900 setup_kernel_sigmask(env);
1902 pthread_mutex_lock(&qemu_mutex);
1904 kvm_qemu_init_env(env);
1905 #ifdef TARGET_I386
1906 kvm_tpr_vcpu_start(env);
1907 #endif
1909 cpu_single_env = env;
1910 kvm_arch_load_regs(env);
1912 while (1) {
1913 int run_cpu = !is_cpu_stopped(env);
1914 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1915 process_irqchip_events(env);
1916 run_cpu = !env->halted;
1918 if (run_cpu) {
1919 kvm_main_loop_wait(env, 0);
1920 kvm_cpu_exec(env);
1921 } else {
1922 kvm_main_loop_wait(env, 1000);
1925 pthread_mutex_unlock(&qemu_mutex);
1926 return 0;
1929 static void *ap_main_loop(void *_env)
1931 CPUState *env = _env;
1932 sigset_t signals;
1933 struct ioperm_data *data = NULL;
1935 current_env = env;
1936 env->thread_id = kvm_get_thread_id();
1937 sigfillset(&signals);
1938 sigprocmask(SIG_BLOCK, &signals, NULL);
1939 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1941 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1942 /* do ioperm for io ports of assigned devices */
1943 LIST_FOREACH(data, &ioperm_head, entries)
1944 on_vcpu(env, kvm_arch_do_ioperm, data);
1945 #endif
1947 /* signal VCPU creation */
1948 pthread_mutex_lock(&qemu_mutex);
1949 current_env->created = 1;
1950 pthread_cond_signal(&qemu_vcpu_cond);
1952 /* and wait for machine initialization */
1953 while (!qemu_system_ready)
1954 qemu_cond_wait(&qemu_system_cond);
1955 pthread_mutex_unlock(&qemu_mutex);
1957 kvm_main_loop_cpu(env);
1958 return NULL;
1961 void kvm_init_vcpu(CPUState *env)
1963 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1965 while (env->created == 0)
1966 qemu_cond_wait(&qemu_vcpu_cond);
1969 int kvm_vcpu_inited(CPUState *env)
1971 return env->created;
1974 #ifdef TARGET_I386
1975 void kvm_hpet_disable_kpit(void)
1977 struct kvm_pit_state2 ps2;
1979 kvm_get_pit2(kvm_context, &ps2);
1980 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1981 kvm_set_pit2(kvm_context, &ps2);
1984 void kvm_hpet_enable_kpit(void)
1986 struct kvm_pit_state2 ps2;
1988 kvm_get_pit2(kvm_context, &ps2);
1989 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1990 kvm_set_pit2(kvm_context, &ps2);
1992 #endif
1994 int kvm_init_ap(void)
1996 #ifdef TARGET_I386
1997 kvm_tpr_opt_setup();
1998 #endif
1999 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2001 signal(SIG_IPI, sig_ipi_handler);
2002 return 0;
2005 void qemu_kvm_notify_work(void)
2007 uint64_t value = 1;
2008 char buffer[8];
2009 size_t offset = 0;
2011 if (io_thread_fd == -1)
2012 return;
2014 memcpy(buffer, &value, sizeof(value));
2016 while (offset < 8) {
2017 ssize_t len;
2019 len = write(io_thread_fd, buffer + offset, 8 - offset);
2020 if (len == -1 && errno == EINTR)
2021 continue;
2023 /* In case we have a pipe, there is not reason to insist writing
2024 * 8 bytes
2026 if (len == -1 && errno == EAGAIN)
2027 break;
2029 if (len <= 0)
2030 break;
2032 offset += len;
2036 /* If we have signalfd, we mask out the signals we want to handle and then
2037 * use signalfd to listen for them. We rely on whatever the current signal
2038 * handler is to dispatch the signals when we receive them.
2041 static void sigfd_handler(void *opaque)
2043 int fd = (unsigned long)opaque;
2044 struct qemu_signalfd_siginfo info;
2045 struct sigaction action;
2046 ssize_t len;
2048 while (1) {
2049 do {
2050 len = read(fd, &info, sizeof(info));
2051 } while (len == -1 && errno == EINTR);
2053 if (len == -1 && errno == EAGAIN)
2054 break;
2056 if (len != sizeof(info)) {
2057 printf("read from sigfd returned %zd: %m\n", len);
2058 return;
2061 sigaction(info.ssi_signo, NULL, &action);
2062 if (action.sa_handler)
2063 action.sa_handler(info.ssi_signo);
2068 /* Used to break IO thread out of select */
2069 static void io_thread_wakeup(void *opaque)
2071 int fd = (unsigned long)opaque;
2072 char buffer[4096];
2074 /* Drain the pipe/(eventfd) */
2075 while (1) {
2076 ssize_t len;
2078 len = read(fd, buffer, sizeof(buffer));
2079 if (len == -1 && errno == EINTR)
2080 continue;
2082 if (len <= 0)
2083 break;
2087 int kvm_main_loop(void)
2089 int fds[2];
2090 sigset_t mask;
2091 int sigfd;
2093 io_thread = pthread_self();
2094 qemu_system_ready = 1;
2096 if (qemu_eventfd(fds) == -1) {
2097 fprintf(stderr, "failed to create eventfd\n");
2098 return -errno;
2101 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2102 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2104 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2105 (void *)(unsigned long)fds[0]);
2107 io_thread_fd = fds[1];
2109 sigemptyset(&mask);
2110 sigaddset(&mask, SIGIO);
2111 sigaddset(&mask, SIGALRM);
2112 sigprocmask(SIG_BLOCK, &mask, NULL);
2114 sigfd = qemu_signalfd(&mask);
2115 if (sigfd == -1) {
2116 fprintf(stderr, "failed to create signalfd\n");
2117 return -errno;
2120 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2122 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2123 (void *)(unsigned long)sigfd);
2125 pthread_cond_broadcast(&qemu_system_cond);
2127 io_thread_sigfd = sigfd;
2128 cpu_single_env = NULL;
2130 while (1) {
2131 main_loop_wait(1000);
2132 if (qemu_shutdown_requested()) {
2133 if (qemu_no_shutdown()) {
2134 vm_stop(0);
2135 } else
2136 break;
2137 } else if (qemu_powerdown_requested())
2138 qemu_system_powerdown();
2139 else if (qemu_reset_requested())
2140 qemu_kvm_system_reset();
2141 else if (kvm_debug_cpu_requested) {
2142 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2143 vm_stop(EXCP_DEBUG);
2144 kvm_debug_cpu_requested = NULL;
2148 pause_all_threads();
2149 pthread_mutex_unlock(&qemu_mutex);
2151 return 0;
2154 #ifdef TARGET_I386
2155 static int destroy_region_works = 0;
2156 #endif
2159 #if !defined(TARGET_I386)
2160 int kvm_arch_init_irq_routing(void)
2162 return 0;
2164 #endif
2166 int kvm_qemu_create_context(void)
2168 int r;
2170 if (!kvm_irqchip) {
2171 kvm_disable_irqchip_creation(kvm_context);
2173 if (!kvm_pit) {
2174 kvm_disable_pit_creation(kvm_context);
2176 if (kvm_create(kvm_context, 0, NULL) < 0) {
2177 kvm_finalize(kvm_state);
2178 return -1;
2180 r = kvm_arch_qemu_create_context();
2181 if(r <0)
2182 kvm_finalize(kvm_state);
2183 if (kvm_pit && !kvm_pit_reinject) {
2184 if (kvm_reinject_control(kvm_context, 0)) {
2185 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2186 return -1;
2189 #ifdef TARGET_I386
2190 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2191 #endif
2193 r = kvm_arch_init_irq_routing();
2194 if (r < 0) {
2195 return r;
2198 return 0;
2201 #ifdef TARGET_I386
2202 static int must_use_aliases_source(target_phys_addr_t addr)
2204 if (destroy_region_works)
2205 return false;
2206 if (addr == 0xa0000 || addr == 0xa8000)
2207 return true;
2208 return false;
2211 static int must_use_aliases_target(target_phys_addr_t addr)
2213 if (destroy_region_works)
2214 return false;
2215 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2216 return true;
2217 return false;
2220 static struct mapping {
2221 target_phys_addr_t phys;
2222 ram_addr_t ram;
2223 ram_addr_t len;
2224 } mappings[50];
2225 static int nr_mappings;
2227 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2229 struct mapping *p;
2231 for (p = mappings; p < mappings + nr_mappings; ++p) {
2232 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2233 return p;
2236 return NULL;
2239 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2241 struct mapping *p;
2243 for (p = mappings; p < mappings + nr_mappings; ++p) {
2244 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2245 return p;
2248 return NULL;
2251 static void drop_mapping(target_phys_addr_t start_addr)
2253 struct mapping *p = find_mapping(start_addr);
2255 if (p)
2256 *p = mappings[--nr_mappings];
2258 #endif
2260 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2261 ram_addr_t phys_offset)
2263 int r = 0;
2264 unsigned long area_flags;
2265 #ifdef TARGET_I386
2266 struct mapping *p;
2267 #endif
2269 if (start_addr + size > phys_ram_size) {
2270 phys_ram_size = start_addr + size;
2273 phys_offset &= ~IO_MEM_ROM;
2274 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2276 if (area_flags != IO_MEM_RAM) {
2277 #ifdef TARGET_I386
2278 if (must_use_aliases_source(start_addr)) {
2279 kvm_destroy_memory_alias(kvm_context, start_addr);
2280 return;
2282 if (must_use_aliases_target(start_addr))
2283 return;
2284 #endif
2285 while (size > 0) {
2286 p = find_mapping(start_addr);
2287 if (p) {
2288 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2289 drop_mapping(p->phys);
2291 start_addr += TARGET_PAGE_SIZE;
2292 if (size > TARGET_PAGE_SIZE) {
2293 size -= TARGET_PAGE_SIZE;
2294 } else {
2295 size = 0;
2298 return;
2301 r = kvm_is_containing_region(kvm_context, start_addr, size);
2302 if (r)
2303 return;
2305 if (area_flags >= TLB_MMIO)
2306 return;
2308 #ifdef TARGET_I386
2309 if (must_use_aliases_source(start_addr)) {
2310 p = find_ram_mapping(phys_offset);
2311 if (p) {
2312 kvm_create_memory_alias(kvm_context, start_addr, size,
2313 p->phys + (phys_offset - p->ram));
2315 return;
2317 #endif
2319 r = kvm_register_phys_mem(kvm_context, start_addr,
2320 qemu_get_ram_ptr(phys_offset),
2321 size, 0);
2322 if (r < 0) {
2323 printf("kvm_cpu_register_physical_memory: failed\n");
2324 exit(1);
2327 #ifdef TARGET_I386
2328 drop_mapping(start_addr);
2329 p = &mappings[nr_mappings++];
2330 p->phys = start_addr;
2331 p->ram = phys_offset;
2332 p->len = size;
2333 #endif
2335 return;
2338 int kvm_setup_guest_memory(void *area, unsigned long size)
2340 int ret = 0;
2342 #ifdef MADV_DONTFORK
2343 if (kvm_enabled() && !kvm_has_sync_mmu())
2344 ret = madvise(area, size, MADV_DONTFORK);
2345 #endif
2347 if (ret)
2348 perror ("madvise");
2350 return ret;
2353 int kvm_qemu_check_extension(int ext)
2355 return kvm_check_extension(kvm_state, ext);
2358 int kvm_qemu_init_env(CPUState *cenv)
2360 return kvm_arch_qemu_init_env(cenv);
2363 #ifdef KVM_CAP_SET_GUEST_DEBUG
2365 struct kvm_set_guest_debug_data {
2366 struct kvm_guest_debug dbg;
2367 int err;
2370 static void kvm_invoke_set_guest_debug(void *data)
2372 struct kvm_set_guest_debug_data *dbg_data = data;
2374 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2375 &dbg_data->dbg);
2378 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2380 struct kvm_set_guest_debug_data data;
2382 data.dbg.control = 0;
2383 if (env->singlestep_enabled)
2384 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2386 kvm_arch_update_guest_debug(env, &data.dbg);
2387 data.dbg.control |= reinject_trap;
2389 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2390 return data.err;
2393 #endif
2396 * dirty pages logging
2398 /* FIXME: use unsigned long pointer instead of unsigned char */
2399 unsigned char *kvm_dirty_bitmap = NULL;
2400 int kvm_physical_memory_set_dirty_tracking(int enable)
2402 int r = 0;
2404 if (!kvm_enabled())
2405 return 0;
2407 if (enable) {
2408 if (!kvm_dirty_bitmap) {
2409 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2410 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2411 if (kvm_dirty_bitmap == NULL) {
2412 perror("Failed to allocate dirty pages bitmap");
2413 r=-1;
2415 else {
2416 r = kvm_dirty_pages_log_enable_all(kvm_context);
2420 else {
2421 if (kvm_dirty_bitmap) {
2422 r = kvm_dirty_pages_log_reset(kvm_context);
2423 qemu_free(kvm_dirty_bitmap);
2424 kvm_dirty_bitmap = NULL;
2427 return r;
2430 /* get kvm's dirty pages bitmap and update qemu's */
2431 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2432 unsigned char *bitmap,
2433 unsigned long offset,
2434 unsigned long mem_size)
2436 unsigned int i, j, n=0;
2437 unsigned char c;
2438 unsigned long page_number, addr, addr1;
2439 ram_addr_t ram_addr;
2440 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2443 * bitmap-traveling is faster than memory-traveling (for addr...)
2444 * especially when most of the memory is not dirty.
2446 for (i=0; i<len; i++) {
2447 c = bitmap[i];
2448 while (c>0) {
2449 j = ffsl(c) - 1;
2450 c &= ~(1u<<j);
2451 page_number = i * 8 + j;
2452 addr1 = page_number * TARGET_PAGE_SIZE;
2453 addr = offset + addr1;
2454 ram_addr = cpu_get_physical_page_desc(addr);
2455 cpu_physical_memory_set_dirty(ram_addr);
2456 n++;
2459 return 0;
2461 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2462 void *bitmap, void *opaque)
2464 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2468 * get kvm's dirty pages bitmap and update qemu's
2469 * we only care about physical ram, which resides in slots 0 and 3
2471 int kvm_update_dirty_pages_log(void)
2473 int r = 0;
2476 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2477 NULL,
2478 kvm_get_dirty_bitmap_cb);
2479 return r;
2482 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2483 int log)
2485 if (log)
2486 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2487 else {
2488 #ifdef TARGET_I386
2489 if (must_use_aliases_target(start))
2490 return;
2491 #endif
2492 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2496 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2498 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2499 unsigned int brsize = BITMAP_SIZE(ram_size);
2500 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2501 unsigned int extra_bytes = (extra_pages +7)/8;
2502 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2503 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2505 memset(bitmap, 0xFF, brsize + extra_bytes);
2506 memset(bitmap + hole_start, 0, hole_end - hole_start);
2507 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2509 return 0;
2512 #ifdef KVM_CAP_IRQCHIP
2514 int kvm_set_irq(int irq, int level, int *status)
2516 return kvm_set_irq_level(kvm_context, irq, level, status);
2519 #endif
2521 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2523 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2526 void kvm_mutex_unlock(void)
2528 assert(!cpu_single_env);
2529 pthread_mutex_unlock(&qemu_mutex);
2532 void kvm_mutex_lock(void)
2534 pthread_mutex_lock(&qemu_mutex);
2535 cpu_single_env = NULL;
2538 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2539 void kvm_add_ioperm_data(struct ioperm_data *data)
2541 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2544 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2546 struct ioperm_data *data;
2548 data = LIST_FIRST(&ioperm_head);
2549 while (data) {
2550 struct ioperm_data *next = LIST_NEXT(data, entries);
2552 if (data->start_port == start_port && data->num == num) {
2553 LIST_REMOVE(data, entries);
2554 qemu_free(data);
2557 data = next;
2561 void kvm_ioperm(CPUState *env, void *data)
2563 if (kvm_enabled() && qemu_system_ready)
2564 on_vcpu(env, kvm_arch_do_ioperm, data);
2567 #endif
2569 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2571 #ifndef TARGET_IA64
2573 #ifdef TARGET_I386
2574 if (must_use_aliases_source(start_addr))
2575 return 0;
2576 #endif
2578 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2579 NULL, kvm_get_dirty_bitmap_cb);
2580 #endif
2581 return 0;
2584 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2586 #ifdef TARGET_I386
2587 if (must_use_aliases_source(phys_addr))
2588 return 0;
2589 #endif
2591 #ifndef TARGET_IA64
2592 kvm_qemu_log_memory(phys_addr, len, 1);
2593 #endif
2594 return 0;
2597 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2599 #ifdef TARGET_I386
2600 if (must_use_aliases_source(phys_addr))
2601 return 0;
2602 #endif
2604 #ifndef TARGET_IA64
2605 kvm_qemu_log_memory(phys_addr, len, 0);
2606 #endif
2607 return 0;
2610 int kvm_set_boot_cpu_id(uint32_t id)
2612 return kvm_set_boot_vcpu_id(kvm_context, id);
2615 #ifdef TARGET_I386
2616 #ifdef KVM_CAP_MCE
2617 struct kvm_x86_mce_data
2619 CPUState *env;
2620 struct kvm_x86_mce *mce;
2623 static void kvm_do_inject_x86_mce(void *_data)
2625 struct kvm_x86_mce_data *data = _data;
2626 int r;
2628 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2629 if (r < 0)
2630 perror("kvm_set_mce FAILED");
2632 #endif
2634 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2635 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2637 #ifdef KVM_CAP_MCE
2638 struct kvm_x86_mce mce = {
2639 .bank = bank,
2640 .status = status,
2641 .mcg_status = mcg_status,
2642 .addr = addr,
2643 .misc = misc,
2645 struct kvm_x86_mce_data data = {
2646 .env = cenv,
2647 .mce = &mce,
2650 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2651 #endif
2653 #endif