qemu-kvm: routing table update thinko fix
[qemu-kvm/fedora.git] / qemu-kvm.c
blob9d550d31c1ca76854e67463371ea51e48b184203
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->stopped = 1;
96 return handle;
98 #endif
100 #define PM_IO_BASE 0xb000
102 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
104 if (addr == 0xb2) {
105 switch (data) {
106 case 0: {
107 cpu_outb(0, 0xb3, 0);
108 break;
110 case 0xf0: {
111 unsigned x;
113 /* enable acpi */
114 x = cpu_inw(0, PM_IO_BASE + 4);
115 x &= ~1;
116 cpu_outw(0, PM_IO_BASE + 4, x);
117 break;
119 case 0xf1: {
120 unsigned x;
122 /* enable acpi */
123 x = cpu_inw(0, PM_IO_BASE + 4);
124 x |= 1;
125 cpu_outw(0, PM_IO_BASE + 4, x);
126 break;
128 default:
129 break;
131 return 0;
133 cpu_outb(0, addr, data);
134 return 0;
137 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
139 cpu_outw(0, addr, data);
140 return 0;
143 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
145 cpu_outl(0, addr, data);
146 return 0;
149 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
151 cpu_physical_memory_rw(addr, data, len, 0);
152 return 0;
155 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
157 cpu_physical_memory_rw(addr, data, len, 1);
158 return 0;
161 static int handle_unhandled(uint64_t reason)
163 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
164 return -EINVAL;
168 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
170 uint32_t *bitmap = kvm->used_gsi_bitmap;
172 if (gsi < kvm->max_gsi)
173 bitmap[gsi / 32] |= 1U << (gsi % 32);
174 else
175 DPRINTF("Invalid GSI %d\n");
178 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
180 uint32_t *bitmap = kvm->used_gsi_bitmap;
182 if (gsi < kvm->max_gsi)
183 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
184 else
185 DPRINTF("Invalid GSI %d\n");
188 struct slot_info {
189 unsigned long phys_addr;
190 unsigned long len;
191 unsigned long userspace_addr;
192 unsigned flags;
193 int logging_count;
196 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
198 static void init_slots(void)
200 int i;
202 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
203 slots[i].len = 0;
206 static int get_free_slot(kvm_context_t kvm)
208 int i;
209 int tss_ext;
211 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
212 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
213 #else
214 tss_ext = 0;
215 #endif
218 * on older kernels where the set tss ioctl is not supprted we must save
219 * slot 0 to hold the extended memory, as the vmx will use the last 3
220 * pages of this slot.
222 if (tss_ext > 0)
223 i = 0;
224 else
225 i = 1;
227 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
228 if (!slots[i].len)
229 return i;
230 return -1;
233 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
234 unsigned long userspace_addr, unsigned flags)
236 slots[slot].phys_addr = phys_addr;
237 slots[slot].len = len;
238 slots[slot].userspace_addr = userspace_addr;
239 slots[slot].flags = flags;
242 static void free_slot(int slot)
244 slots[slot].len = 0;
245 slots[slot].logging_count = 0;
248 static int get_slot(unsigned long phys_addr)
250 int i;
252 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
253 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
254 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
255 return i;
257 return -1;
260 /* Returns -1 if this slot is not totally contained on any other,
261 * and the number of the slot otherwise */
262 static int get_container_slot(uint64_t phys_addr, unsigned long size)
264 int i;
266 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
267 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
268 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
269 return i;
270 return -1;
273 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
275 int slot = get_container_slot(phys_addr, size);
276 if (slot == -1)
277 return 0;
278 return 1;
282 * dirty pages logging control
284 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
285 unsigned long phys_addr,
286 unsigned flags,
287 unsigned mask)
289 int r = -1;
290 int slot = get_slot(phys_addr);
292 if (slot == -1) {
293 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
294 return 1;
297 flags = (slots[slot].flags & ~mask) | flags;
298 if (flags == slots[slot].flags)
299 return 0;
300 slots[slot].flags = flags;
303 struct kvm_userspace_memory_region mem = {
304 .slot = slot,
305 .memory_size = slots[slot].len,
306 .guest_phys_addr = slots[slot].phys_addr,
307 .userspace_addr = slots[slot].userspace_addr,
308 .flags = slots[slot].flags,
312 DPRINTF("slot %d start %llx len %llx flags %x\n",
313 mem.slot,
314 mem.guest_phys_addr,
315 mem.memory_size,
316 mem.flags);
317 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
318 if (r < 0)
319 fprintf(stderr, "%s: %m\n", __FUNCTION__);
321 return r;
324 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
325 int (*change)(kvm_context_t kvm,
326 uint64_t start,
327 uint64_t len))
329 int i, r;
331 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
332 if (slots[i].len)
333 r = change(kvm, slots[i].phys_addr, slots[i].len);
335 return r;
338 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
339 uint64_t phys_addr,
340 uint64_t len)
342 int slot = get_slot(phys_addr);
344 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
345 if (slot == -1) {
346 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
347 return -EINVAL;
350 if (slots[slot].logging_count++)
351 return 0;
353 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
354 KVM_MEM_LOG_DIRTY_PAGES,
355 KVM_MEM_LOG_DIRTY_PAGES);
358 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
359 uint64_t phys_addr,
360 uint64_t len)
362 int slot = get_slot(phys_addr);
364 if (slot == -1) {
365 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
366 return -EINVAL;
369 if (--slots[slot].logging_count)
370 return 0;
372 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
374 KVM_MEM_LOG_DIRTY_PAGES);
378 * Enable dirty page logging for all memory regions
380 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
382 if (kvm->dirty_pages_log_all)
383 return 0;
384 kvm->dirty_pages_log_all = 1;
385 return kvm_dirty_pages_log_change_all(kvm,
386 kvm_dirty_pages_log_enable_slot);
390 * Enable dirty page logging only for memory regions that were created with
391 * dirty logging enabled (disable for all other memory regions).
393 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
395 if (!kvm->dirty_pages_log_all)
396 return 0;
397 kvm->dirty_pages_log_all = 0;
398 return kvm_dirty_pages_log_change_all(kvm,
399 kvm_dirty_pages_log_disable_slot);
403 static int kvm_create_context(void);
405 int kvm_init(int smp_cpus)
407 int fd;
408 int r, gsi_count;
411 fd = open("/dev/kvm", O_RDWR);
412 if (fd == -1) {
413 perror("open /dev/kvm");
414 return -1;
416 r = ioctl(fd, KVM_GET_API_VERSION, 0);
417 if (r == -1) {
418 fprintf(stderr, "kvm kernel version too old: "
419 "KVM_GET_API_VERSION ioctl not supported\n");
420 goto out_close;
422 if (r < EXPECTED_KVM_API_VERSION) {
423 fprintf(stderr, "kvm kernel version too old: "
424 "We expect API version %d or newer, but got "
425 "version %d\n",
426 EXPECTED_KVM_API_VERSION, r);
427 goto out_close;
429 if (r > EXPECTED_KVM_API_VERSION) {
430 fprintf(stderr, "kvm userspace version too old\n");
431 goto out_close;
433 kvm_abi = r;
434 kvm_page_size = getpagesize();
435 kvm_state = qemu_mallocz(sizeof(*kvm_state));
436 kvm_context = &kvm_state->kvm_context;
438 kvm_state->fd = fd;
439 kvm_state->vmfd = -1;
440 kvm_context->opaque = cpu_single_env;
441 kvm_context->dirty_pages_log_all = 0;
442 kvm_context->no_irqchip_creation = 0;
443 kvm_context->no_pit_creation = 0;
445 #ifdef KVM_CAP_SET_GUEST_DEBUG
446 TAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
447 #endif
449 gsi_count = kvm_get_gsi_count(kvm_context);
450 if (gsi_count > 0) {
451 int gsi_bits, i;
453 /* Round up so we can search ints using ffs */
454 gsi_bits = ALIGN(gsi_count, 32);
455 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
456 kvm_context->max_gsi = gsi_bits;
458 /* Mark any over-allocated bits as already in use */
459 for (i = gsi_count; i < gsi_bits; i++)
460 set_gsi(kvm_context, i);
463 pthread_mutex_lock(&qemu_mutex);
464 return kvm_create_context();
466 out_close:
467 close(fd);
468 return -1;
471 static void kvm_finalize(KVMState *s)
473 /* FIXME
474 if (kvm->vcpu_fd[0] != -1)
475 close(kvm->vcpu_fd[0]);
476 if (kvm->vm_fd != -1)
477 close(kvm->vm_fd);
479 close(s->fd);
480 free(s);
483 void kvm_disable_irqchip_creation(kvm_context_t kvm)
485 kvm->no_irqchip_creation = 1;
488 void kvm_disable_pit_creation(kvm_context_t kvm)
490 kvm->no_pit_creation = 1;
493 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
495 long mmap_size;
496 int r;
497 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
498 kvm_context_t kvm = kvm_context;
500 vcpu_ctx->kvm = kvm;
501 vcpu_ctx->id = id;
503 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
504 if (r < 0) {
505 fprintf(stderr, "kvm_create_vcpu: %m\n");
506 goto err;
508 vcpu_ctx->fd = r;
510 env->kvm_fd = r;
511 env->kvm_state = kvm_state;
513 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
514 if (mmap_size < 0) {
515 fprintf(stderr, "get vcpu mmap size: %m\n");
516 goto err_fd;
518 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
519 vcpu_ctx->fd, 0);
520 if (vcpu_ctx->run == MAP_FAILED) {
521 fprintf(stderr, "mmap vcpu area: %m\n");
522 goto err_fd;
524 return vcpu_ctx;
525 err_fd:
526 close(vcpu_ctx->fd);
527 err:
528 free(vcpu_ctx);
529 return NULL;
532 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
534 #ifdef KVM_CAP_SET_BOOT_CPU_ID
535 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
536 if (r > 0)
537 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
538 return -ENOSYS;
539 #else
540 return -ENOSYS;
541 #endif
544 int kvm_create_vm(kvm_context_t kvm)
546 int fd;
547 #ifdef KVM_CAP_IRQ_ROUTING
548 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
549 kvm->nr_allocated_irq_routes = 0;
550 #endif
552 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
553 if (fd < 0) {
554 fprintf(stderr, "kvm_create_vm: %m\n");
555 return -1;
557 kvm_state->vmfd = fd;
558 return 0;
561 static int kvm_create_default_phys_mem(kvm_context_t kvm,
562 unsigned long phys_mem_bytes,
563 void **vm_mem)
565 #ifdef KVM_CAP_USER_MEMORY
566 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
567 if (r > 0)
568 return 0;
569 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
570 #else
571 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
572 #endif
573 return -1;
576 void kvm_create_irqchip(kvm_context_t kvm)
578 int r;
580 kvm->irqchip_in_kernel = 0;
581 #ifdef KVM_CAP_IRQCHIP
582 if (!kvm->no_irqchip_creation) {
583 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
584 if (r > 0) { /* kernel irqchip supported */
585 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
586 if (r >= 0) {
587 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
588 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
589 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
590 KVM_CAP_IRQ_INJECT_STATUS);
591 if (r > 0)
592 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
593 #endif
594 kvm->irqchip_in_kernel = 1;
596 else
597 fprintf(stderr, "Create kernel PIC irqchip failed\n");
600 #endif
603 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
605 int r;
607 r = kvm_create_vm(kvm);
608 if (r < 0)
609 return r;
610 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
611 if (r < 0)
612 return r;
613 init_slots();
614 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
615 if (r < 0)
616 return r;
617 kvm_create_irqchip(kvm);
619 return 0;
623 int kvm_register_phys_mem(kvm_context_t kvm,
624 unsigned long phys_start, void *userspace_addr,
625 unsigned long len, int log)
628 struct kvm_userspace_memory_region memory = {
629 .memory_size = len,
630 .guest_phys_addr = phys_start,
631 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
632 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
634 int r;
636 memory.slot = get_free_slot(kvm);
637 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
638 memory.guest_phys_addr, memory.memory_size,
639 memory.userspace_addr, memory.slot, memory.flags);
640 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
641 if (r < 0) {
642 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
643 return -1;
645 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
646 memory.userspace_addr, memory.flags);
647 return 0;
651 /* destroy/free a whole slot.
652 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
654 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
655 unsigned long len)
657 int slot;
658 int r;
659 struct kvm_userspace_memory_region memory = {
660 .memory_size = 0,
661 .guest_phys_addr = phys_start,
662 .userspace_addr = 0,
663 .flags = 0,
666 slot = get_slot(phys_start);
668 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
669 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
670 __FUNCTION__, slot);
671 return;
673 if (phys_start != slots[slot].phys_addr) {
674 fprintf(stderr,
675 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
676 __FUNCTION__, phys_start, slots[slot].phys_addr);
677 phys_start = slots[slot].phys_addr;
680 memory.slot = slot;
681 DPRINTF("slot %d start %llx len %llx flags %x\n",
682 memory.slot,
683 memory.guest_phys_addr,
684 memory.memory_size,
685 memory.flags);
686 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
687 if (r < 0) {
688 fprintf(stderr, "destroy_userspace_phys_mem: %s",
689 strerror(-r));
690 return;
693 free_slot(memory.slot);
696 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
699 int slot = get_container_slot(phys_addr, size);
701 if (slot != -1) {
702 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
703 kvm_destroy_phys_mem(kvm, phys_addr, size);
704 return;
708 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
710 int r;
711 struct kvm_dirty_log log = {
712 .slot = slot,
715 log.dirty_bitmap = buf;
717 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
718 if (r < 0)
719 return r;
720 return 0;
723 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
725 int slot;
727 slot = get_slot(phys_addr);
728 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
731 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
732 unsigned long len, void *opaque,
733 int (*cb)(unsigned long start, unsigned long len,
734 void*bitmap, void *opaque))
736 int i;
737 int r;
738 unsigned long end_addr = phys_addr + len;
739 void *buf;
741 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
742 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
743 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
744 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
745 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
746 if (r) {
747 qemu_free(buf);
748 return r;
750 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
751 qemu_free(buf);
752 if (r)
753 return r;
756 return 0;
759 #ifdef KVM_CAP_IRQCHIP
761 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
763 struct kvm_irq_level event;
764 int r;
766 if (!kvm->irqchip_in_kernel)
767 return 0;
768 event.level = level;
769 event.irq = irq;
770 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
771 if (r < 0)
772 perror("kvm_set_irq_level");
774 if (status) {
775 #ifdef KVM_CAP_IRQ_INJECT_STATUS
776 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
777 1 : event.status;
778 #else
779 *status = 1;
780 #endif
783 return 1;
786 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
788 int r;
790 if (!kvm->irqchip_in_kernel)
791 return 0;
792 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
793 if (r < 0) {
794 perror("kvm_get_irqchip\n");
796 return r;
799 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
801 int r;
803 if (!kvm->irqchip_in_kernel)
804 return 0;
805 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
806 if (r < 0) {
807 perror("kvm_set_irqchip\n");
809 return r;
812 #endif
814 static int handle_io(kvm_vcpu_context_t vcpu)
816 struct kvm_run *run = vcpu->run;
817 kvm_context_t kvm = vcpu->kvm;
818 uint16_t addr = run->io.port;
819 int r;
820 int i;
821 void *p = (void *)run + run->io.data_offset;
823 for (i = 0; i < run->io.count; ++i) {
824 switch (run->io.direction) {
825 case KVM_EXIT_IO_IN:
826 r = 0;
827 switch (run->io.size) {
828 case 1:
829 *(uint8_t *)p = cpu_inb(kvm->opaque, addr);
830 break;
831 case 2:
832 *(uint16_t *)p = cpu_inw(kvm->opaque, addr);
833 break;
834 case 4:
835 *(uint32_t *)p = cpu_inl(kvm->opaque, addr);
836 break;
837 default:
838 fprintf(stderr, "bad I/O size %d\n", run->io.size);
839 return -EMSGSIZE;
841 break;
842 case KVM_EXIT_IO_OUT:
843 switch (run->io.size) {
844 case 1:
845 r = kvm_outb(kvm->opaque, addr,
846 *(uint8_t *)p);
847 break;
848 case 2:
849 r = kvm_outw(kvm->opaque, addr,
850 *(uint16_t *)p);
851 break;
852 case 4:
853 r = kvm_outl(kvm->opaque, addr,
854 *(uint32_t *)p);
855 break;
856 default:
857 fprintf(stderr, "bad I/O size %d\n", run->io.size);
858 return -EMSGSIZE;
860 break;
861 default:
862 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
863 return -EPROTO;
866 p += run->io.size;
869 return 0;
872 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
874 #ifdef KVM_CAP_SET_GUEST_DEBUG
875 struct kvm_run *run = vcpu->run;
876 kvm_context_t kvm = vcpu->kvm;
878 return kvm_debug(kvm->opaque, env, &run->debug.arch);
879 #else
880 return 0;
881 #endif
884 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
886 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
889 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
891 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
894 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
896 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
899 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
901 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
904 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
906 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
909 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
911 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
914 #ifdef KVM_CAP_MP_STATE
915 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
917 int r;
919 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
920 if (r > 0)
921 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
922 return -ENOSYS;
925 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
927 int r;
929 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
930 if (r > 0)
931 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
932 return -ENOSYS;
934 #endif
936 static int handle_mmio(kvm_vcpu_context_t vcpu)
938 unsigned long addr = vcpu->run->mmio.phys_addr;
939 kvm_context_t kvm = vcpu->kvm;
940 struct kvm_run *kvm_run = vcpu->run;
941 void *data = kvm_run->mmio.data;
943 /* hack: Red Hat 7.1 generates these weird accesses. */
944 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
945 return 0;
947 if (kvm_run->mmio.is_write)
948 return kvm_mmio_write(kvm->opaque, addr, data,
949 kvm_run->mmio.len);
950 else
951 return kvm_mmio_read(kvm->opaque, addr, data,
952 kvm_run->mmio.len);
955 int handle_io_window(kvm_context_t kvm)
957 return 1;
960 int handle_halt(kvm_vcpu_context_t vcpu)
962 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
965 int handle_shutdown(kvm_context_t kvm, CPUState *env)
967 /* stop the current vcpu from going back to guest mode */
968 env->stopped = 1;
970 qemu_system_reset_request();
971 return 1;
974 static inline void push_nmi(kvm_context_t kvm)
976 #ifdef KVM_CAP_USER_NMI
977 kvm_arch_push_nmi(kvm->opaque);
978 #endif /* KVM_CAP_USER_NMI */
981 void post_kvm_run(kvm_context_t kvm, CPUState *env)
983 pthread_mutex_lock(&qemu_mutex);
984 kvm_arch_post_kvm_run(kvm->opaque, env);
987 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
989 kvm_arch_pre_kvm_run(kvm->opaque, env);
991 pthread_mutex_unlock(&qemu_mutex);
992 return 0;
995 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
997 return vcpu->run->if_flag;
1000 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1002 return vcpu->run->ready_for_interrupt_injection;
1005 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1007 int r;
1008 int fd = vcpu->fd;
1009 struct kvm_run *run = vcpu->run;
1010 kvm_context_t kvm = vcpu->kvm;
1012 again:
1013 push_nmi(kvm);
1014 #if !defined(__s390__)
1015 if (!kvm->irqchip_in_kernel)
1016 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1017 #endif
1018 r = pre_kvm_run(kvm, env);
1019 if (r)
1020 return r;
1021 r = ioctl(fd, KVM_RUN, 0);
1023 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1024 r = -errno;
1025 post_kvm_run(kvm, env);
1026 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1027 return r;
1030 post_kvm_run(kvm, env);
1032 #if defined(KVM_CAP_COALESCED_MMIO)
1033 if (kvm->coalesced_mmio) {
1034 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1035 kvm->coalesced_mmio * PAGE_SIZE;
1036 while (ring->first != ring->last) {
1037 kvm_mmio_write(kvm->opaque,
1038 ring->coalesced_mmio[ring->first].phys_addr,
1039 &ring->coalesced_mmio[ring->first].data[0],
1040 ring->coalesced_mmio[ring->first].len);
1041 smp_wmb();
1042 ring->first = (ring->first + 1) %
1043 KVM_COALESCED_MMIO_MAX;
1046 #endif
1048 #if !defined(__s390__)
1049 if (r == -1) {
1050 r = handle_io_window(kvm);
1051 goto more;
1053 #endif
1054 if (1) {
1055 switch (run->exit_reason) {
1056 case KVM_EXIT_UNKNOWN:
1057 r = handle_unhandled(run->hw.hardware_exit_reason);
1058 break;
1059 case KVM_EXIT_FAIL_ENTRY:
1060 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1061 break;
1062 case KVM_EXIT_EXCEPTION:
1063 fprintf(stderr, "exception %d (%x)\n",
1064 run->ex.exception,
1065 run->ex.error_code);
1066 kvm_show_regs(vcpu);
1067 kvm_show_code(vcpu);
1068 abort();
1069 break;
1070 case KVM_EXIT_IO:
1071 r = handle_io(vcpu);
1072 break;
1073 case KVM_EXIT_DEBUG:
1074 r = handle_debug(vcpu, env);
1075 break;
1076 case KVM_EXIT_MMIO:
1077 r = handle_mmio(vcpu);
1078 break;
1079 case KVM_EXIT_HLT:
1080 r = handle_halt(vcpu);
1081 break;
1082 case KVM_EXIT_IRQ_WINDOW_OPEN:
1083 break;
1084 case KVM_EXIT_SHUTDOWN:
1085 r = handle_shutdown(kvm, env);
1086 break;
1087 #if defined(__s390__)
1088 case KVM_EXIT_S390_SIEIC:
1089 r = kvm_s390_handle_intercept(kvm, vcpu,
1090 run);
1091 break;
1092 case KVM_EXIT_S390_RESET:
1093 r = kvm_s390_handle_reset(kvm, vcpu, run);
1094 break;
1095 #endif
1096 default:
1097 if (kvm_arch_run(vcpu)) {
1098 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1099 run->exit_reason);
1100 kvm_show_regs(vcpu);
1101 abort();
1103 break;
1106 more:
1107 if (!r)
1108 goto again;
1109 return r;
1112 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1114 struct kvm_interrupt intr;
1116 intr.irq = irq;
1117 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1120 #ifdef KVM_CAP_SET_GUEST_DEBUG
1121 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1123 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1125 #endif
1127 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1129 struct kvm_signal_mask *sigmask;
1130 int r;
1132 if (!sigset) {
1133 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1134 if (r == -1)
1135 r = -errno;
1136 return r;
1138 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1140 sigmask->len = 8;
1141 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1142 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1143 if (r == -1)
1144 r = -errno;
1145 free(sigmask);
1146 return r;
1149 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1151 return kvm->irqchip_in_kernel;
1154 int kvm_pit_in_kernel(kvm_context_t kvm)
1156 return kvm->pit_in_kernel;
1159 int kvm_has_sync_mmu(void)
1161 int r = 0;
1162 #ifdef KVM_CAP_SYNC_MMU
1163 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1164 #endif
1165 return r;
1168 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1170 #ifdef KVM_CAP_USER_NMI
1171 return ioctl(vcpu->fd, KVM_NMI);
1172 #else
1173 return -ENOSYS;
1174 #endif
1177 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1179 int r = 0;
1180 kvm->coalesced_mmio = 0;
1181 #ifdef KVM_CAP_COALESCED_MMIO
1182 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1183 if (r > 0) {
1184 kvm->coalesced_mmio = r;
1185 return 0;
1187 #endif
1188 return r;
1191 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1193 #ifdef KVM_CAP_COALESCED_MMIO
1194 kvm_context_t kvm = kvm_context;
1195 struct kvm_coalesced_mmio_zone zone;
1196 int r;
1198 if (kvm->coalesced_mmio) {
1200 zone.addr = addr;
1201 zone.size = size;
1203 r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone);
1204 if (r < 0) {
1205 perror("kvm_register_coalesced_mmio_zone");
1206 return r;
1208 return 0;
1210 #endif
1211 return -ENOSYS;
1214 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1216 #ifdef KVM_CAP_COALESCED_MMIO
1217 kvm_context_t kvm = kvm_context;
1218 struct kvm_coalesced_mmio_zone zone;
1219 int r;
1221 if (kvm->coalesced_mmio) {
1223 zone.addr = addr;
1224 zone.size = size;
1226 r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1227 if (r < 0) {
1228 perror("kvm_unregister_coalesced_mmio_zone");
1229 return r;
1231 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1232 return 0;
1234 #endif
1235 return -ENOSYS;
1238 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1239 int kvm_assign_pci_device(kvm_context_t kvm,
1240 struct kvm_assigned_pci_dev *assigned_dev)
1242 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1245 static int kvm_old_assign_irq(kvm_context_t kvm,
1246 struct kvm_assigned_irq *assigned_irq)
1248 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1251 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1252 int kvm_assign_irq(kvm_context_t kvm,
1253 struct kvm_assigned_irq *assigned_irq)
1255 int ret;
1257 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1258 if (ret > 0) {
1259 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1262 return kvm_old_assign_irq(kvm, assigned_irq);
1265 int kvm_deassign_irq(kvm_context_t kvm,
1266 struct kvm_assigned_irq *assigned_irq)
1268 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1270 #else
1271 int kvm_assign_irq(kvm_context_t kvm,
1272 struct kvm_assigned_irq *assigned_irq)
1274 return kvm_old_assign_irq(kvm, assigned_irq);
1276 #endif
1277 #endif
1279 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1280 int kvm_deassign_pci_device(kvm_context_t kvm,
1281 struct kvm_assigned_pci_dev *assigned_dev)
1283 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1285 #endif
1287 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1289 int ret = 0;
1291 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1292 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1293 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1294 if (ret <= 0)
1295 ret = 0;
1296 #endif
1297 return ret;
1300 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1302 #ifdef KVM_CAP_REINJECT_CONTROL
1303 int r;
1304 struct kvm_reinject_control control;
1306 control.pit_reinject = pit_reinject;
1308 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1309 if (r > 0) {
1310 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1312 #endif
1313 return -ENOSYS;
1316 int kvm_has_gsi_routing(kvm_context_t kvm)
1318 int r = 0;
1320 #ifdef KVM_CAP_IRQ_ROUTING
1321 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1322 #endif
1323 return r;
1326 int kvm_get_gsi_count(kvm_context_t kvm)
1328 #ifdef KVM_CAP_IRQ_ROUTING
1329 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1330 #else
1331 return -EINVAL;
1332 #endif
1335 int kvm_clear_gsi_routes(kvm_context_t kvm)
1337 #ifdef KVM_CAP_IRQ_ROUTING
1338 kvm->irq_routes->nr = 0;
1339 return 0;
1340 #else
1341 return -EINVAL;
1342 #endif
1345 int kvm_add_routing_entry(kvm_context_t kvm,
1346 struct kvm_irq_routing_entry* entry)
1348 #ifdef KVM_CAP_IRQ_ROUTING
1349 struct kvm_irq_routing *z;
1350 struct kvm_irq_routing_entry *new;
1351 int n, size;
1353 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1354 n = kvm->nr_allocated_irq_routes * 2;
1355 if (n < 64)
1356 n = 64;
1357 size = sizeof(struct kvm_irq_routing);
1358 size += n * sizeof(*new);
1359 z = realloc(kvm->irq_routes, size);
1360 if (!z)
1361 return -ENOMEM;
1362 kvm->nr_allocated_irq_routes = n;
1363 kvm->irq_routes = z;
1365 n = kvm->irq_routes->nr++;
1366 new = &kvm->irq_routes->entries[n];
1367 memset(new, 0, sizeof(*new));
1368 new->gsi = entry->gsi;
1369 new->type = entry->type;
1370 new->flags = entry->flags;
1371 new->u = entry->u;
1373 set_gsi(kvm, entry->gsi);
1375 return 0;
1376 #else
1377 return -ENOSYS;
1378 #endif
1381 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1383 #ifdef KVM_CAP_IRQ_ROUTING
1384 struct kvm_irq_routing_entry e;
1386 e.gsi = gsi;
1387 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1388 e.flags = 0;
1389 e.u.irqchip.irqchip = irqchip;
1390 e.u.irqchip.pin = pin;
1391 return kvm_add_routing_entry(kvm, &e);
1392 #else
1393 return -ENOSYS;
1394 #endif
1397 int kvm_del_routing_entry(kvm_context_t kvm,
1398 struct kvm_irq_routing_entry* entry)
1400 #ifdef KVM_CAP_IRQ_ROUTING
1401 struct kvm_irq_routing_entry *e, *p;
1402 int i, gsi, found = 0;
1404 gsi = entry->gsi;
1406 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1407 e = &kvm->irq_routes->entries[i];
1408 if (e->type == entry->type
1409 && e->gsi == gsi) {
1410 switch (e->type)
1412 case KVM_IRQ_ROUTING_IRQCHIP: {
1413 if (e->u.irqchip.irqchip ==
1414 entry->u.irqchip.irqchip
1415 && e->u.irqchip.pin ==
1416 entry->u.irqchip.pin) {
1417 p = &kvm->irq_routes->
1418 entries[--kvm->irq_routes->nr];
1419 *e = *p;
1420 found = 1;
1422 break;
1424 case KVM_IRQ_ROUTING_MSI: {
1425 if (e->u.msi.address_lo ==
1426 entry->u.msi.address_lo
1427 && e->u.msi.address_hi ==
1428 entry->u.msi.address_hi
1429 && e->u.msi.data == entry->u.msi.data) {
1430 p = &kvm->irq_routes->
1431 entries[--kvm->irq_routes->nr];
1432 *e = *p;
1433 found = 1;
1435 break;
1437 default:
1438 break;
1440 if (found) {
1441 /* If there are no other users of this GSI
1442 * mark it available in the bitmap */
1443 for (i = 0; i < kvm->irq_routes->nr; i++) {
1444 e = &kvm->irq_routes->entries[i];
1445 if (e->gsi == gsi)
1446 break;
1448 if (i == kvm->irq_routes->nr)
1449 clear_gsi(kvm, gsi);
1451 return 0;
1455 return -ESRCH;
1456 #else
1457 return -ENOSYS;
1458 #endif
1461 int kvm_update_routing_entry(kvm_context_t kvm,
1462 struct kvm_irq_routing_entry* entry,
1463 struct kvm_irq_routing_entry* newentry)
1465 #ifdef KVM_CAP_IRQ_ROUTING
1466 struct kvm_irq_routing_entry *e;
1467 int i;
1469 if (entry->gsi != newentry->gsi ||
1470 entry->type != newentry->type) {
1471 return -EINVAL;
1474 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1475 e = &kvm->irq_routes->entries[i];
1476 if (e->type != entry->type || e->gsi != entry->gsi) {
1477 continue;
1479 switch (e->type) {
1480 case KVM_IRQ_ROUTING_IRQCHIP:
1481 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1482 e->u.irqchip.pin == entry->u.irqchip.pin) {
1483 memcpy(&e->u.irqchip, &newentry->u.irqchip, sizeof e->u.irqchip);
1484 return 0;
1486 break;
1487 case KVM_IRQ_ROUTING_MSI:
1488 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1489 e->u.msi.address_hi == entry->u.msi.address_hi &&
1490 e->u.msi.data == entry->u.msi.data) {
1491 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1492 return 0;
1494 break;
1495 default:
1496 break;
1499 return -ESRCH;
1500 #else
1501 return -ENOSYS;
1502 #endif
1505 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1507 #ifdef KVM_CAP_IRQ_ROUTING
1508 struct kvm_irq_routing_entry e;
1510 e.gsi = gsi;
1511 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1512 e.flags = 0;
1513 e.u.irqchip.irqchip = irqchip;
1514 e.u.irqchip.pin = pin;
1515 return kvm_del_routing_entry(kvm, &e);
1516 #else
1517 return -ENOSYS;
1518 #endif
1521 int kvm_commit_irq_routes(kvm_context_t kvm)
1523 #ifdef KVM_CAP_IRQ_ROUTING
1524 kvm->irq_routes->flags = 0;
1525 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1526 #else
1527 return -ENOSYS;
1528 #endif
1531 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1533 int i, bit;
1534 uint32_t *buf = kvm->used_gsi_bitmap;
1536 /* Return the lowest unused GSI in the bitmap */
1537 for (i = 0; i < kvm->max_gsi / 32; i++) {
1538 bit = ffs(~buf[i]);
1539 if (!bit)
1540 continue;
1542 return bit - 1 + i * 32;
1545 return -ENOSPC;
1548 #ifdef KVM_CAP_DEVICE_MSIX
1549 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1550 struct kvm_assigned_msix_nr *msix_nr)
1552 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1555 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1556 struct kvm_assigned_msix_entry *entry)
1558 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1560 #endif
1562 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1564 #include <sys/eventfd.h>
1566 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1568 struct kvm_irqfd data = {
1569 .fd = fd,
1570 .gsi = gsi,
1571 .flags = flags,
1574 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1577 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1579 int r;
1580 int fd;
1582 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1583 return -ENOENT;
1585 fd = eventfd(0, 0);
1586 if (fd < 0)
1587 return -errno;
1589 r = _kvm_irqfd(kvm, fd, gsi, 0);
1590 if (r < 0) {
1591 close(fd);
1592 return -errno;
1595 return fd;
1598 #else /* KVM_CAP_IRQFD */
1600 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1602 return -ENOSYS;
1605 #endif /* KVM_CAP_IRQFD */
1606 static inline unsigned long kvm_get_thread_id(void)
1608 return syscall(SYS_gettid);
1611 static void qemu_cond_wait(pthread_cond_t *cond)
1613 CPUState *env = cpu_single_env;
1614 static const struct timespec ts = {
1615 .tv_sec = 0,
1616 .tv_nsec = 100000,
1619 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1620 cpu_single_env = env;
1623 static void sig_ipi_handler(int n)
1627 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1629 struct qemu_work_item wi;
1631 if (env == current_env) {
1632 func(data);
1633 return;
1636 wi.func = func;
1637 wi.data = data;
1638 if (!env->kvm_cpu_state.queued_work_first)
1639 env->kvm_cpu_state.queued_work_first = &wi;
1640 else
1641 env->kvm_cpu_state.queued_work_last->next = &wi;
1642 env->kvm_cpu_state.queued_work_last = &wi;
1643 wi.next = NULL;
1644 wi.done = false;
1646 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1647 while (!wi.done)
1648 qemu_cond_wait(&qemu_work_cond);
1651 static void inject_interrupt(void *data)
1653 cpu_interrupt(current_env, (long)data);
1656 void kvm_inject_interrupt(CPUState *env, int mask)
1658 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1661 void kvm_update_interrupt_request(CPUState *env)
1663 int signal = 0;
1665 if (env) {
1666 if (!current_env || !current_env->created)
1667 signal = 1;
1669 * Testing for created here is really redundant
1671 if (current_env && current_env->created &&
1672 env != current_env && !env->kvm_cpu_state.signalled)
1673 signal = 1;
1675 if (signal) {
1676 env->kvm_cpu_state.signalled = 1;
1677 if (env->kvm_cpu_state.thread)
1678 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1683 static void kvm_do_load_registers(void *_env)
1685 CPUState *env = _env;
1687 kvm_arch_load_regs(env);
1690 void kvm_load_registers(CPUState *env)
1692 if (kvm_enabled() && qemu_system_ready)
1693 on_vcpu(env, kvm_do_load_registers, env);
1696 static void kvm_do_save_registers(void *_env)
1698 CPUState *env = _env;
1700 kvm_arch_save_regs(env);
1703 void kvm_save_registers(CPUState *env)
1705 if (kvm_enabled())
1706 on_vcpu(env, kvm_do_save_registers, env);
1709 static void kvm_do_load_mpstate(void *_env)
1711 CPUState *env = _env;
1713 kvm_arch_load_mpstate(env);
1716 void kvm_load_mpstate(CPUState *env)
1718 if (kvm_enabled() && qemu_system_ready)
1719 on_vcpu(env, kvm_do_load_mpstate, env);
1722 static void kvm_do_save_mpstate(void *_env)
1724 CPUState *env = _env;
1726 kvm_arch_save_mpstate(env);
1727 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1730 void kvm_save_mpstate(CPUState *env)
1732 if (kvm_enabled())
1733 on_vcpu(env, kvm_do_save_mpstate, env);
1736 int kvm_cpu_exec(CPUState *env)
1738 int r;
1740 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1741 if (r < 0) {
1742 printf("kvm_run returned %d\n", r);
1743 vm_stop(0);
1746 return 0;
1749 static int is_cpu_stopped(CPUState *env)
1751 return !vm_running || env->stopped;
1754 static void flush_queued_work(CPUState *env)
1756 struct qemu_work_item *wi;
1758 if (!env->kvm_cpu_state.queued_work_first)
1759 return;
1761 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1762 env->kvm_cpu_state.queued_work_first = wi->next;
1763 wi->func(wi->data);
1764 wi->done = true;
1766 env->kvm_cpu_state.queued_work_last = NULL;
1767 pthread_cond_broadcast(&qemu_work_cond);
1770 static void kvm_main_loop_wait(CPUState *env, int timeout)
1772 struct timespec ts;
1773 int r, e;
1774 siginfo_t siginfo;
1775 sigset_t waitset;
1777 pthread_mutex_unlock(&qemu_mutex);
1779 ts.tv_sec = timeout / 1000;
1780 ts.tv_nsec = (timeout % 1000) * 1000000;
1781 sigemptyset(&waitset);
1782 sigaddset(&waitset, SIG_IPI);
1784 r = sigtimedwait(&waitset, &siginfo, &ts);
1785 e = errno;
1787 pthread_mutex_lock(&qemu_mutex);
1789 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1790 printf("sigtimedwait: %s\n", strerror(e));
1791 exit(1);
1794 cpu_single_env = env;
1795 flush_queued_work(env);
1797 if (env->stop) {
1798 env->stop = 0;
1799 env->stopped = 1;
1800 pthread_cond_signal(&qemu_pause_cond);
1803 env->kvm_cpu_state.signalled = 0;
1806 static int all_threads_paused(void)
1808 CPUState *penv = first_cpu;
1810 while (penv) {
1811 if (penv->stop)
1812 return 0;
1813 penv = (CPUState *)penv->next_cpu;
1816 return 1;
1819 static void pause_all_threads(void)
1821 CPUState *penv = first_cpu;
1823 while (penv) {
1824 if (penv != cpu_single_env) {
1825 penv->stop = 1;
1826 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1827 } else {
1828 penv->stop = 0;
1829 penv->stopped = 1;
1830 cpu_exit(penv);
1832 penv = (CPUState *)penv->next_cpu;
1835 while (!all_threads_paused())
1836 qemu_cond_wait(&qemu_pause_cond);
1839 static void resume_all_threads(void)
1841 CPUState *penv = first_cpu;
1843 assert(!cpu_single_env);
1845 while (penv) {
1846 penv->stop = 0;
1847 penv->stopped = 0;
1848 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1849 penv = (CPUState *)penv->next_cpu;
1853 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1855 if (running)
1856 resume_all_threads();
1857 else
1858 pause_all_threads();
1861 static void setup_kernel_sigmask(CPUState *env)
1863 sigset_t set;
1865 sigemptyset(&set);
1866 sigaddset(&set, SIGUSR2);
1867 sigaddset(&set, SIGIO);
1868 sigaddset(&set, SIGALRM);
1869 sigprocmask(SIG_BLOCK, &set, NULL);
1871 sigprocmask(SIG_BLOCK, NULL, &set);
1872 sigdelset(&set, SIG_IPI);
1874 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1877 static void qemu_kvm_system_reset(void)
1879 CPUState *penv = first_cpu;
1881 pause_all_threads();
1883 qemu_system_reset();
1885 while (penv) {
1886 kvm_arch_cpu_reset(penv);
1887 penv = (CPUState *)penv->next_cpu;
1890 resume_all_threads();
1893 static void process_irqchip_events(CPUState *env)
1895 kvm_arch_process_irqchip_events(env);
1896 if (kvm_arch_has_work(env))
1897 env->halted = 0;
1900 static int kvm_main_loop_cpu(CPUState *env)
1902 setup_kernel_sigmask(env);
1904 pthread_mutex_lock(&qemu_mutex);
1906 kvm_qemu_init_env(env);
1907 #ifdef TARGET_I386
1908 kvm_tpr_vcpu_start(env);
1909 #endif
1911 cpu_single_env = env;
1912 kvm_arch_load_regs(env);
1914 while (1) {
1915 int run_cpu = !is_cpu_stopped(env);
1916 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1917 process_irqchip_events(env);
1918 run_cpu = !env->halted;
1920 if (run_cpu) {
1921 kvm_main_loop_wait(env, 0);
1922 kvm_cpu_exec(env);
1923 } else {
1924 kvm_main_loop_wait(env, 1000);
1927 pthread_mutex_unlock(&qemu_mutex);
1928 return 0;
1931 static void *ap_main_loop(void *_env)
1933 CPUState *env = _env;
1934 sigset_t signals;
1935 struct ioperm_data *data = NULL;
1937 current_env = env;
1938 env->thread_id = kvm_get_thread_id();
1939 sigfillset(&signals);
1940 sigprocmask(SIG_BLOCK, &signals, NULL);
1941 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1943 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1944 /* do ioperm for io ports of assigned devices */
1945 LIST_FOREACH(data, &ioperm_head, entries)
1946 on_vcpu(env, kvm_arch_do_ioperm, data);
1947 #endif
1949 /* signal VCPU creation */
1950 pthread_mutex_lock(&qemu_mutex);
1951 current_env->created = 1;
1952 pthread_cond_signal(&qemu_vcpu_cond);
1954 /* and wait for machine initialization */
1955 while (!qemu_system_ready)
1956 qemu_cond_wait(&qemu_system_cond);
1957 pthread_mutex_unlock(&qemu_mutex);
1959 kvm_main_loop_cpu(env);
1960 return NULL;
1963 void kvm_init_vcpu(CPUState *env)
1965 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1967 while (env->created == 0)
1968 qemu_cond_wait(&qemu_vcpu_cond);
1971 int kvm_vcpu_inited(CPUState *env)
1973 return env->created;
1976 #ifdef TARGET_I386
1977 void kvm_hpet_disable_kpit(void)
1979 struct kvm_pit_state2 ps2;
1981 kvm_get_pit2(kvm_context, &ps2);
1982 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1983 kvm_set_pit2(kvm_context, &ps2);
1986 void kvm_hpet_enable_kpit(void)
1988 struct kvm_pit_state2 ps2;
1990 kvm_get_pit2(kvm_context, &ps2);
1991 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1992 kvm_set_pit2(kvm_context, &ps2);
1994 #endif
1996 int kvm_init_ap(void)
1998 #ifdef TARGET_I386
1999 kvm_tpr_opt_setup();
2000 #endif
2001 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2003 signal(SIG_IPI, sig_ipi_handler);
2004 return 0;
2007 void qemu_kvm_notify_work(void)
2009 uint64_t value = 1;
2010 char buffer[8];
2011 size_t offset = 0;
2013 if (io_thread_fd == -1)
2014 return;
2016 memcpy(buffer, &value, sizeof(value));
2018 while (offset < 8) {
2019 ssize_t len;
2021 len = write(io_thread_fd, buffer + offset, 8 - offset);
2022 if (len == -1 && errno == EINTR)
2023 continue;
2025 /* In case we have a pipe, there is not reason to insist writing
2026 * 8 bytes
2028 if (len == -1 && errno == EAGAIN)
2029 break;
2031 if (len <= 0)
2032 break;
2034 offset += len;
2038 /* If we have signalfd, we mask out the signals we want to handle and then
2039 * use signalfd to listen for them. We rely on whatever the current signal
2040 * handler is to dispatch the signals when we receive them.
2043 static void sigfd_handler(void *opaque)
2045 int fd = (unsigned long)opaque;
2046 struct qemu_signalfd_siginfo info;
2047 struct sigaction action;
2048 ssize_t len;
2050 while (1) {
2051 do {
2052 len = read(fd, &info, sizeof(info));
2053 } while (len == -1 && errno == EINTR);
2055 if (len == -1 && errno == EAGAIN)
2056 break;
2058 if (len != sizeof(info)) {
2059 printf("read from sigfd returned %zd: %m\n", len);
2060 return;
2063 sigaction(info.ssi_signo, NULL, &action);
2064 if (action.sa_handler)
2065 action.sa_handler(info.ssi_signo);
2070 /* Used to break IO thread out of select */
2071 static void io_thread_wakeup(void *opaque)
2073 int fd = (unsigned long)opaque;
2074 char buffer[4096];
2076 /* Drain the pipe/(eventfd) */
2077 while (1) {
2078 ssize_t len;
2080 len = read(fd, buffer, sizeof(buffer));
2081 if (len == -1 && errno == EINTR)
2082 continue;
2084 if (len <= 0)
2085 break;
2089 int kvm_main_loop(void)
2091 int fds[2];
2092 sigset_t mask;
2093 int sigfd;
2095 io_thread = pthread_self();
2096 qemu_system_ready = 1;
2098 if (qemu_eventfd(fds) == -1) {
2099 fprintf(stderr, "failed to create eventfd\n");
2100 return -errno;
2103 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2104 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2106 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2107 (void *)(unsigned long)fds[0]);
2109 io_thread_fd = fds[1];
2111 sigemptyset(&mask);
2112 sigaddset(&mask, SIGIO);
2113 sigaddset(&mask, SIGALRM);
2114 sigprocmask(SIG_BLOCK, &mask, NULL);
2116 sigfd = qemu_signalfd(&mask);
2117 if (sigfd == -1) {
2118 fprintf(stderr, "failed to create signalfd\n");
2119 return -errno;
2122 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2124 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2125 (void *)(unsigned long)sigfd);
2127 pthread_cond_broadcast(&qemu_system_cond);
2129 io_thread_sigfd = sigfd;
2130 cpu_single_env = NULL;
2132 while (1) {
2133 main_loop_wait(1000);
2134 if (qemu_shutdown_requested()) {
2135 if (qemu_no_shutdown()) {
2136 vm_stop(0);
2137 } else
2138 break;
2139 } else if (qemu_powerdown_requested())
2140 qemu_system_powerdown();
2141 else if (qemu_reset_requested())
2142 qemu_kvm_system_reset();
2143 else if (kvm_debug_cpu_requested) {
2144 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2145 vm_stop(EXCP_DEBUG);
2146 kvm_debug_cpu_requested = NULL;
2150 pause_all_threads();
2151 pthread_mutex_unlock(&qemu_mutex);
2153 return 0;
2156 #ifdef TARGET_I386
2157 static int destroy_region_works = 0;
2158 #endif
2161 #if !defined(TARGET_I386)
2162 int kvm_arch_init_irq_routing(void)
2164 return 0;
2166 #endif
2168 static int kvm_create_context()
2170 int r;
2172 if (!kvm_irqchip) {
2173 kvm_disable_irqchip_creation(kvm_context);
2175 if (!kvm_pit) {
2176 kvm_disable_pit_creation(kvm_context);
2178 if (kvm_create(kvm_context, 0, NULL) < 0) {
2179 kvm_finalize(kvm_state);
2180 return -1;
2182 r = kvm_arch_qemu_create_context();
2183 if(r <0)
2184 kvm_finalize(kvm_state);
2185 if (kvm_pit && !kvm_pit_reinject) {
2186 if (kvm_reinject_control(kvm_context, 0)) {
2187 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2188 return -1;
2191 #ifdef TARGET_I386
2192 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2193 #endif
2195 r = kvm_arch_init_irq_routing();
2196 if (r < 0) {
2197 return r;
2200 return 0;
2203 #ifdef TARGET_I386
2204 static int must_use_aliases_source(target_phys_addr_t addr)
2206 if (destroy_region_works)
2207 return false;
2208 if (addr == 0xa0000 || addr == 0xa8000)
2209 return true;
2210 return false;
2213 static int must_use_aliases_target(target_phys_addr_t addr)
2215 if (destroy_region_works)
2216 return false;
2217 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2218 return true;
2219 return false;
2222 static struct mapping {
2223 target_phys_addr_t phys;
2224 ram_addr_t ram;
2225 ram_addr_t len;
2226 } mappings[50];
2227 static int nr_mappings;
2229 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2231 struct mapping *p;
2233 for (p = mappings; p < mappings + nr_mappings; ++p) {
2234 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2235 return p;
2238 return NULL;
2241 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2243 struct mapping *p;
2245 for (p = mappings; p < mappings + nr_mappings; ++p) {
2246 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2247 return p;
2250 return NULL;
2253 static void drop_mapping(target_phys_addr_t start_addr)
2255 struct mapping *p = find_mapping(start_addr);
2257 if (p)
2258 *p = mappings[--nr_mappings];
2260 #endif
2262 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2263 ram_addr_t phys_offset)
2265 int r = 0;
2266 unsigned long area_flags;
2267 #ifdef TARGET_I386
2268 struct mapping *p;
2269 #endif
2271 if (start_addr + size > phys_ram_size) {
2272 phys_ram_size = start_addr + size;
2275 phys_offset &= ~IO_MEM_ROM;
2276 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2278 if (area_flags != IO_MEM_RAM) {
2279 #ifdef TARGET_I386
2280 if (must_use_aliases_source(start_addr)) {
2281 kvm_destroy_memory_alias(kvm_context, start_addr);
2282 return;
2284 if (must_use_aliases_target(start_addr))
2285 return;
2286 #endif
2287 while (size > 0) {
2288 p = find_mapping(start_addr);
2289 if (p) {
2290 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2291 drop_mapping(p->phys);
2293 start_addr += TARGET_PAGE_SIZE;
2294 if (size > TARGET_PAGE_SIZE) {
2295 size -= TARGET_PAGE_SIZE;
2296 } else {
2297 size = 0;
2300 return;
2303 r = kvm_is_containing_region(kvm_context, start_addr, size);
2304 if (r)
2305 return;
2307 if (area_flags >= TLB_MMIO)
2308 return;
2310 #ifdef TARGET_I386
2311 if (must_use_aliases_source(start_addr)) {
2312 p = find_ram_mapping(phys_offset);
2313 if (p) {
2314 kvm_create_memory_alias(kvm_context, start_addr, size,
2315 p->phys + (phys_offset - p->ram));
2317 return;
2319 #endif
2321 r = kvm_register_phys_mem(kvm_context, start_addr,
2322 qemu_get_ram_ptr(phys_offset),
2323 size, 0);
2324 if (r < 0) {
2325 printf("kvm_cpu_register_physical_memory: failed\n");
2326 exit(1);
2329 #ifdef TARGET_I386
2330 drop_mapping(start_addr);
2331 p = &mappings[nr_mappings++];
2332 p->phys = start_addr;
2333 p->ram = phys_offset;
2334 p->len = size;
2335 #endif
2337 return;
2340 int kvm_setup_guest_memory(void *area, unsigned long size)
2342 int ret = 0;
2344 #ifdef MADV_DONTFORK
2345 if (kvm_enabled() && !kvm_has_sync_mmu())
2346 ret = madvise(area, size, MADV_DONTFORK);
2347 #endif
2349 if (ret)
2350 perror ("madvise");
2352 return ret;
2355 int kvm_qemu_check_extension(int ext)
2357 return kvm_check_extension(kvm_state, ext);
2360 int kvm_qemu_init_env(CPUState *cenv)
2362 return kvm_arch_qemu_init_env(cenv);
2365 #ifdef KVM_CAP_SET_GUEST_DEBUG
2367 struct kvm_set_guest_debug_data {
2368 struct kvm_guest_debug dbg;
2369 int err;
2372 static void kvm_invoke_set_guest_debug(void *data)
2374 struct kvm_set_guest_debug_data *dbg_data = data;
2376 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2377 &dbg_data->dbg);
2380 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2382 struct kvm_set_guest_debug_data data;
2384 data.dbg.control = 0;
2385 if (env->singlestep_enabled)
2386 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2388 kvm_arch_update_guest_debug(env, &data.dbg);
2389 data.dbg.control |= reinject_trap;
2391 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2392 return data.err;
2395 #endif
2398 * dirty pages logging
2400 /* FIXME: use unsigned long pointer instead of unsigned char */
2401 unsigned char *kvm_dirty_bitmap = NULL;
2402 int kvm_physical_memory_set_dirty_tracking(int enable)
2404 int r = 0;
2406 if (!kvm_enabled())
2407 return 0;
2409 if (enable) {
2410 if (!kvm_dirty_bitmap) {
2411 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2412 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2413 if (kvm_dirty_bitmap == NULL) {
2414 perror("Failed to allocate dirty pages bitmap");
2415 r=-1;
2417 else {
2418 r = kvm_dirty_pages_log_enable_all(kvm_context);
2422 else {
2423 if (kvm_dirty_bitmap) {
2424 r = kvm_dirty_pages_log_reset(kvm_context);
2425 qemu_free(kvm_dirty_bitmap);
2426 kvm_dirty_bitmap = NULL;
2429 return r;
2432 /* get kvm's dirty pages bitmap and update qemu's */
2433 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2434 unsigned char *bitmap,
2435 unsigned long offset,
2436 unsigned long mem_size)
2438 unsigned int i, j, n=0;
2439 unsigned char c;
2440 unsigned long page_number, addr, addr1;
2441 ram_addr_t ram_addr;
2442 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2445 * bitmap-traveling is faster than memory-traveling (for addr...)
2446 * especially when most of the memory is not dirty.
2448 for (i=0; i<len; i++) {
2449 c = bitmap[i];
2450 while (c>0) {
2451 j = ffsl(c) - 1;
2452 c &= ~(1u<<j);
2453 page_number = i * 8 + j;
2454 addr1 = page_number * TARGET_PAGE_SIZE;
2455 addr = offset + addr1;
2456 ram_addr = cpu_get_physical_page_desc(addr);
2457 cpu_physical_memory_set_dirty(ram_addr);
2458 n++;
2461 return 0;
2463 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2464 void *bitmap, void *opaque)
2466 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2470 * get kvm's dirty pages bitmap and update qemu's
2471 * we only care about physical ram, which resides in slots 0 and 3
2473 int kvm_update_dirty_pages_log(void)
2475 int r = 0;
2478 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2479 NULL,
2480 kvm_get_dirty_bitmap_cb);
2481 return r;
2484 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2485 int log)
2487 if (log)
2488 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2489 else {
2490 #ifdef TARGET_I386
2491 if (must_use_aliases_target(start))
2492 return;
2493 #endif
2494 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2498 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2500 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2501 unsigned int brsize = BITMAP_SIZE(ram_size);
2502 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2503 unsigned int extra_bytes = (extra_pages +7)/8;
2504 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2505 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2507 memset(bitmap, 0xFF, brsize + extra_bytes);
2508 memset(bitmap + hole_start, 0, hole_end - hole_start);
2509 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2511 return 0;
2514 #ifdef KVM_CAP_IRQCHIP
2516 int kvm_set_irq(int irq, int level, int *status)
2518 return kvm_set_irq_level(kvm_context, irq, level, status);
2521 #endif
2523 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2525 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2528 void kvm_mutex_unlock(void)
2530 assert(!cpu_single_env);
2531 pthread_mutex_unlock(&qemu_mutex);
2534 void kvm_mutex_lock(void)
2536 pthread_mutex_lock(&qemu_mutex);
2537 cpu_single_env = NULL;
2540 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2541 void kvm_add_ioperm_data(struct ioperm_data *data)
2543 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2546 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2548 struct ioperm_data *data;
2550 data = LIST_FIRST(&ioperm_head);
2551 while (data) {
2552 struct ioperm_data *next = LIST_NEXT(data, entries);
2554 if (data->start_port == start_port && data->num == num) {
2555 LIST_REMOVE(data, entries);
2556 qemu_free(data);
2559 data = next;
2563 void kvm_ioperm(CPUState *env, void *data)
2565 if (kvm_enabled() && qemu_system_ready)
2566 on_vcpu(env, kvm_arch_do_ioperm, data);
2569 #endif
2571 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2573 #ifndef TARGET_IA64
2575 #ifdef TARGET_I386
2576 if (must_use_aliases_source(start_addr))
2577 return 0;
2578 #endif
2580 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2581 NULL, kvm_get_dirty_bitmap_cb);
2582 #endif
2583 return 0;
2586 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2588 #ifdef TARGET_I386
2589 if (must_use_aliases_source(phys_addr))
2590 return 0;
2591 #endif
2593 #ifndef TARGET_IA64
2594 kvm_qemu_log_memory(phys_addr, len, 1);
2595 #endif
2596 return 0;
2599 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2601 #ifdef TARGET_I386
2602 if (must_use_aliases_source(phys_addr))
2603 return 0;
2604 #endif
2606 #ifndef TARGET_IA64
2607 kvm_qemu_log_memory(phys_addr, len, 0);
2608 #endif
2609 return 0;
2612 int kvm_set_boot_cpu_id(uint32_t id)
2614 return kvm_set_boot_vcpu_id(kvm_context, id);
2617 #ifdef TARGET_I386
2618 #ifdef KVM_CAP_MCE
2619 struct kvm_x86_mce_data
2621 CPUState *env;
2622 struct kvm_x86_mce *mce;
2625 static void kvm_do_inject_x86_mce(void *_data)
2627 struct kvm_x86_mce_data *data = _data;
2628 int r;
2630 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2631 if (r < 0)
2632 perror("kvm_set_mce FAILED");
2634 #endif
2636 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2637 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2639 #ifdef KVM_CAP_MCE
2640 struct kvm_x86_mce mce = {
2641 .bank = bank,
2642 .status = status,
2643 .mcg_status = mcg_status,
2644 .addr = addr,
2645 .misc = misc,
2647 struct kvm_x86_mce_data data = {
2648 .env = cenv,
2649 .mce = &mce,
2652 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2653 #endif
2655 #endif