kvm userspace: ksm support
[qemu-kvm/fedora.git] / qemu-kvm.c
blob32dce4aab7ad3c63dc10703d51c6e245c4c6edd6
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 /* The list of ioperm_data */
70 static LIST_HEAD(, ioperm_data) ioperm_head;
72 //#define DEBUG_MEMREG
73 #ifdef DEBUG_MEMREG
74 #define DPRINTF(fmt, args...) \
75 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
76 #else
77 #define DPRINTF(fmt, args...) do {} while (0)
78 #endif
80 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
82 int kvm_abi = EXPECTED_KVM_API_VERSION;
83 int kvm_page_size;
85 #ifdef KVM_CAP_SET_GUEST_DEBUG
86 static int kvm_debug(void *opaque, void *data,
87 struct kvm_debug_exit_arch *arch_info)
89 int handle = kvm_arch_debug(arch_info);
90 CPUState *env = data;
92 if (handle) {
93 kvm_debug_cpu_requested = env;
94 env->stopped = 1;
96 return handle;
98 #endif
100 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
102 cpu_physical_memory_rw(addr, data, len, 0);
103 return 0;
106 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
108 cpu_physical_memory_rw(addr, data, len, 1);
109 return 0;
112 static int handle_unhandled(uint64_t reason)
114 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
115 return -EINVAL;
119 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
121 uint32_t *bitmap = kvm->used_gsi_bitmap;
123 if (gsi < kvm->max_gsi)
124 bitmap[gsi / 32] |= 1U << (gsi % 32);
125 else
126 DPRINTF("Invalid GSI %d\n");
129 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
131 uint32_t *bitmap = kvm->used_gsi_bitmap;
133 if (gsi < kvm->max_gsi)
134 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
135 else
136 DPRINTF("Invalid GSI %d\n");
139 struct slot_info {
140 unsigned long phys_addr;
141 unsigned long len;
142 unsigned long userspace_addr;
143 unsigned flags;
144 int logging_count;
147 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
149 static void init_slots(void)
151 int i;
153 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
154 slots[i].len = 0;
157 static int get_free_slot(kvm_context_t kvm)
159 int i;
160 int tss_ext;
162 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
163 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
164 #else
165 tss_ext = 0;
166 #endif
169 * on older kernels where the set tss ioctl is not supprted we must save
170 * slot 0 to hold the extended memory, as the vmx will use the last 3
171 * pages of this slot.
173 if (tss_ext > 0)
174 i = 0;
175 else
176 i = 1;
178 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
179 if (!slots[i].len)
180 return i;
181 return -1;
184 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
185 unsigned long userspace_addr, unsigned flags)
187 slots[slot].phys_addr = phys_addr;
188 slots[slot].len = len;
189 slots[slot].userspace_addr = userspace_addr;
190 slots[slot].flags = flags;
193 static void free_slot(int slot)
195 slots[slot].len = 0;
196 slots[slot].logging_count = 0;
199 static int get_slot(unsigned long phys_addr)
201 int i;
203 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
204 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
205 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
206 return i;
208 return -1;
211 /* Returns -1 if this slot is not totally contained on any other,
212 * and the number of the slot otherwise */
213 static int get_container_slot(uint64_t phys_addr, unsigned long size)
215 int i;
217 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
218 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
219 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
220 return i;
221 return -1;
224 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
226 int slot = get_container_slot(phys_addr, size);
227 if (slot == -1)
228 return 0;
229 return 1;
233 * dirty pages logging control
235 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
236 unsigned long phys_addr,
237 unsigned flags,
238 unsigned mask)
240 int r = -1;
241 int slot = get_slot(phys_addr);
243 if (slot == -1) {
244 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
245 return 1;
248 flags = (slots[slot].flags & ~mask) | flags;
249 if (flags == slots[slot].flags)
250 return 0;
251 slots[slot].flags = flags;
254 struct kvm_userspace_memory_region mem = {
255 .slot = slot,
256 .memory_size = slots[slot].len,
257 .guest_phys_addr = slots[slot].phys_addr,
258 .userspace_addr = slots[slot].userspace_addr,
259 .flags = slots[slot].flags,
263 DPRINTF("slot %d start %llx len %llx flags %x\n",
264 mem.slot,
265 mem.guest_phys_addr,
266 mem.memory_size,
267 mem.flags);
268 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
269 if (r < 0)
270 fprintf(stderr, "%s: %m\n", __FUNCTION__);
272 return r;
275 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
276 int (*change)(kvm_context_t kvm,
277 uint64_t start,
278 uint64_t len))
280 int i, r;
282 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
283 if (slots[i].len)
284 r = change(kvm, slots[i].phys_addr, slots[i].len);
286 return r;
289 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
290 uint64_t phys_addr,
291 uint64_t len)
293 int slot = get_slot(phys_addr);
295 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
296 if (slot == -1) {
297 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
298 return -EINVAL;
301 if (slots[slot].logging_count++)
302 return 0;
304 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
305 KVM_MEM_LOG_DIRTY_PAGES,
306 KVM_MEM_LOG_DIRTY_PAGES);
309 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
310 uint64_t phys_addr,
311 uint64_t len)
313 int slot = get_slot(phys_addr);
315 if (slot == -1) {
316 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
317 return -EINVAL;
320 if (--slots[slot].logging_count)
321 return 0;
323 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
325 KVM_MEM_LOG_DIRTY_PAGES);
329 * Enable dirty page logging for all memory regions
331 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
333 if (kvm->dirty_pages_log_all)
334 return 0;
335 kvm->dirty_pages_log_all = 1;
336 return kvm_dirty_pages_log_change_all(kvm,
337 kvm_dirty_pages_log_enable_slot);
341 * Enable dirty page logging only for memory regions that were created with
342 * dirty logging enabled (disable for all other memory regions).
344 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
346 if (!kvm->dirty_pages_log_all)
347 return 0;
348 kvm->dirty_pages_log_all = 0;
349 return kvm_dirty_pages_log_change_all(kvm,
350 kvm_dirty_pages_log_disable_slot);
354 static int kvm_create_context(void);
356 int kvm_init(int smp_cpus)
358 int fd;
359 int r, gsi_count;
362 fd = open("/dev/kvm", O_RDWR);
363 if (fd == -1) {
364 perror("open /dev/kvm");
365 return -1;
367 r = ioctl(fd, KVM_GET_API_VERSION, 0);
368 if (r == -1) {
369 fprintf(stderr, "kvm kernel version too old: "
370 "KVM_GET_API_VERSION ioctl not supported\n");
371 goto out_close;
373 if (r < EXPECTED_KVM_API_VERSION) {
374 fprintf(stderr, "kvm kernel version too old: "
375 "We expect API version %d or newer, but got "
376 "version %d\n",
377 EXPECTED_KVM_API_VERSION, r);
378 goto out_close;
380 if (r > EXPECTED_KVM_API_VERSION) {
381 fprintf(stderr, "kvm userspace version too old\n");
382 goto out_close;
384 kvm_abi = r;
385 kvm_page_size = getpagesize();
386 kvm_state = qemu_mallocz(sizeof(*kvm_state));
387 kvm_context = &kvm_state->kvm_context;
389 kvm_state->fd = fd;
390 kvm_state->vmfd = -1;
391 kvm_context->opaque = cpu_single_env;
392 kvm_context->dirty_pages_log_all = 0;
393 kvm_context->no_irqchip_creation = 0;
394 kvm_context->no_pit_creation = 0;
396 #ifdef KVM_CAP_SET_GUEST_DEBUG
397 TAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
398 #endif
400 gsi_count = kvm_get_gsi_count(kvm_context);
401 if (gsi_count > 0) {
402 int gsi_bits, i;
404 /* Round up so we can search ints using ffs */
405 gsi_bits = ALIGN(gsi_count, 32);
406 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
407 kvm_context->max_gsi = gsi_bits;
409 /* Mark any over-allocated bits as already in use */
410 for (i = gsi_count; i < gsi_bits; i++)
411 set_gsi(kvm_context, i);
414 pthread_mutex_lock(&qemu_mutex);
415 return kvm_create_context();
417 out_close:
418 close(fd);
419 return -1;
422 static void kvm_finalize(KVMState *s)
424 /* FIXME
425 if (kvm->vcpu_fd[0] != -1)
426 close(kvm->vcpu_fd[0]);
427 if (kvm->vm_fd != -1)
428 close(kvm->vm_fd);
430 close(s->fd);
431 free(s);
434 void kvm_disable_irqchip_creation(kvm_context_t kvm)
436 kvm->no_irqchip_creation = 1;
439 void kvm_disable_pit_creation(kvm_context_t kvm)
441 kvm->no_pit_creation = 1;
444 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
446 long mmap_size;
447 int r;
448 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
449 kvm_context_t kvm = kvm_context;
451 vcpu_ctx->kvm = kvm;
452 vcpu_ctx->id = id;
454 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
455 if (r < 0) {
456 fprintf(stderr, "kvm_create_vcpu: %m\n");
457 goto err;
459 vcpu_ctx->fd = r;
461 env->kvm_fd = r;
462 env->kvm_state = kvm_state;
464 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
465 if (mmap_size < 0) {
466 fprintf(stderr, "get vcpu mmap size: %m\n");
467 goto err_fd;
469 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
470 vcpu_ctx->fd, 0);
471 if (vcpu_ctx->run == MAP_FAILED) {
472 fprintf(stderr, "mmap vcpu area: %m\n");
473 goto err_fd;
475 return vcpu_ctx;
476 err_fd:
477 close(vcpu_ctx->fd);
478 err:
479 free(vcpu_ctx);
480 return NULL;
483 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
485 #ifdef KVM_CAP_SET_BOOT_CPU_ID
486 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
487 if (r > 0)
488 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
489 return -ENOSYS;
490 #else
491 return -ENOSYS;
492 #endif
495 int kvm_create_vm(kvm_context_t kvm)
497 int fd;
498 #ifdef KVM_CAP_IRQ_ROUTING
499 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
500 kvm->nr_allocated_irq_routes = 0;
501 #endif
503 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
504 if (fd < 0) {
505 fprintf(stderr, "kvm_create_vm: %m\n");
506 return -1;
508 kvm_state->vmfd = fd;
509 return 0;
512 static int kvm_create_default_phys_mem(kvm_context_t kvm,
513 unsigned long phys_mem_bytes,
514 void **vm_mem)
516 #ifdef KVM_CAP_USER_MEMORY
517 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
518 if (r > 0)
519 return 0;
520 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
521 #else
522 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
523 #endif
524 return -1;
527 void kvm_create_irqchip(kvm_context_t kvm)
529 int r;
531 kvm->irqchip_in_kernel = 0;
532 #ifdef KVM_CAP_IRQCHIP
533 if (!kvm->no_irqchip_creation) {
534 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
535 if (r > 0) { /* kernel irqchip supported */
536 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
537 if (r >= 0) {
538 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
539 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
540 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
541 KVM_CAP_IRQ_INJECT_STATUS);
542 if (r > 0)
543 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
544 #endif
545 kvm->irqchip_in_kernel = 1;
547 else
548 fprintf(stderr, "Create kernel PIC irqchip failed\n");
551 #endif
554 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
556 int r;
558 r = kvm_create_vm(kvm);
559 if (r < 0)
560 return r;
561 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
562 if (r < 0)
563 return r;
564 init_slots();
565 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
566 if (r < 0)
567 return r;
568 kvm_create_irqchip(kvm);
570 return 0;
574 int kvm_register_phys_mem(kvm_context_t kvm,
575 unsigned long phys_start, void *userspace_addr,
576 unsigned long len, int log)
579 struct kvm_userspace_memory_region memory = {
580 .memory_size = len,
581 .guest_phys_addr = phys_start,
582 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
583 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
585 int r;
587 memory.slot = get_free_slot(kvm);
588 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
589 memory.guest_phys_addr, memory.memory_size,
590 memory.userspace_addr, memory.slot, memory.flags);
591 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
592 if (r < 0) {
593 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
594 return -1;
596 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
597 memory.userspace_addr, memory.flags);
598 return 0;
602 /* destroy/free a whole slot.
603 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
605 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
606 unsigned long len)
608 int slot;
609 int r;
610 struct kvm_userspace_memory_region memory = {
611 .memory_size = 0,
612 .guest_phys_addr = phys_start,
613 .userspace_addr = 0,
614 .flags = 0,
617 slot = get_slot(phys_start);
619 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
620 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
621 __FUNCTION__, slot);
622 return;
624 if (phys_start != slots[slot].phys_addr) {
625 fprintf(stderr,
626 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
627 __FUNCTION__, phys_start, slots[slot].phys_addr);
628 phys_start = slots[slot].phys_addr;
631 memory.slot = slot;
632 DPRINTF("slot %d start %llx len %llx flags %x\n",
633 memory.slot,
634 memory.guest_phys_addr,
635 memory.memory_size,
636 memory.flags);
637 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
638 if (r < 0) {
639 fprintf(stderr, "destroy_userspace_phys_mem: %s",
640 strerror(-r));
641 return;
644 free_slot(memory.slot);
647 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
650 int slot = get_container_slot(phys_addr, size);
652 if (slot != -1) {
653 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
654 kvm_destroy_phys_mem(kvm, phys_addr, size);
655 return;
659 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
661 int r;
662 struct kvm_dirty_log log = {
663 .slot = slot,
666 log.dirty_bitmap = buf;
668 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
669 if (r < 0)
670 return r;
671 return 0;
674 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
676 int slot;
678 slot = get_slot(phys_addr);
679 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
682 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
683 unsigned long len, void *opaque,
684 int (*cb)(unsigned long start, unsigned long len,
685 void*bitmap, void *opaque))
687 int i;
688 int r;
689 unsigned long end_addr = phys_addr + len;
690 void *buf;
692 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
693 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
694 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
695 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
696 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
697 if (r) {
698 qemu_free(buf);
699 return r;
701 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
702 qemu_free(buf);
703 if (r)
704 return r;
707 return 0;
710 #ifdef KVM_CAP_IRQCHIP
712 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
714 struct kvm_irq_level event;
715 int r;
717 if (!kvm->irqchip_in_kernel)
718 return 0;
719 event.level = level;
720 event.irq = irq;
721 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
722 if (r < 0)
723 perror("kvm_set_irq_level");
725 if (status) {
726 #ifdef KVM_CAP_IRQ_INJECT_STATUS
727 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
728 1 : event.status;
729 #else
730 *status = 1;
731 #endif
734 return 1;
737 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
739 int r;
741 if (!kvm->irqchip_in_kernel)
742 return 0;
743 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
744 if (r < 0) {
745 perror("kvm_get_irqchip\n");
747 return r;
750 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
752 int r;
754 if (!kvm->irqchip_in_kernel)
755 return 0;
756 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
757 if (r < 0) {
758 perror("kvm_set_irqchip\n");
760 return r;
763 #endif
765 static int handle_io(kvm_vcpu_context_t vcpu)
767 struct kvm_run *run = vcpu->run;
768 kvm_context_t kvm = vcpu->kvm;
769 uint16_t addr = run->io.port;
770 int i;
771 void *p = (void *)run + run->io.data_offset;
773 for (i = 0; i < run->io.count; ++i) {
774 switch (run->io.direction) {
775 case KVM_EXIT_IO_IN:
776 switch (run->io.size) {
777 case 1:
778 *(uint8_t *)p = cpu_inb(kvm->opaque, addr);
779 break;
780 case 2:
781 *(uint16_t *)p = cpu_inw(kvm->opaque, addr);
782 break;
783 case 4:
784 *(uint32_t *)p = cpu_inl(kvm->opaque, addr);
785 break;
786 default:
787 fprintf(stderr, "bad I/O size %d\n", run->io.size);
788 return -EMSGSIZE;
790 break;
791 case KVM_EXIT_IO_OUT:
792 switch (run->io.size) {
793 case 1:
794 cpu_outb(kvm->opaque, addr, *(uint8_t *)p);
795 break;
796 case 2:
797 cpu_outw(kvm->opaque, addr, *(uint16_t *)p);
798 break;
799 case 4:
800 cpu_outl(kvm->opaque, addr, *(uint32_t *)p);
801 break;
802 default:
803 fprintf(stderr, "bad I/O size %d\n", run->io.size);
804 return -EMSGSIZE;
806 break;
807 default:
808 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
809 return -EPROTO;
812 p += run->io.size;
815 return 0;
818 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
820 #ifdef KVM_CAP_SET_GUEST_DEBUG
821 struct kvm_run *run = vcpu->run;
822 kvm_context_t kvm = vcpu->kvm;
824 return kvm_debug(kvm->opaque, env, &run->debug.arch);
825 #else
826 return 0;
827 #endif
830 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
832 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
835 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
837 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
840 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
842 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
845 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
847 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
850 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
852 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
855 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
857 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
860 #ifdef KVM_CAP_MP_STATE
861 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
863 int r;
865 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
866 if (r > 0)
867 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
868 return -ENOSYS;
871 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
873 int r;
875 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
876 if (r > 0)
877 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
878 return -ENOSYS;
880 #endif
882 static int handle_mmio(kvm_vcpu_context_t vcpu)
884 unsigned long addr = vcpu->run->mmio.phys_addr;
885 kvm_context_t kvm = vcpu->kvm;
886 struct kvm_run *kvm_run = vcpu->run;
887 void *data = kvm_run->mmio.data;
889 /* hack: Red Hat 7.1 generates these weird accesses. */
890 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
891 return 0;
893 if (kvm_run->mmio.is_write)
894 return kvm_mmio_write(kvm->opaque, addr, data,
895 kvm_run->mmio.len);
896 else
897 return kvm_mmio_read(kvm->opaque, addr, data,
898 kvm_run->mmio.len);
901 int handle_io_window(kvm_context_t kvm)
903 return 1;
906 int handle_halt(kvm_vcpu_context_t vcpu)
908 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
911 int handle_shutdown(kvm_context_t kvm, CPUState *env)
913 /* stop the current vcpu from going back to guest mode */
914 env->stopped = 1;
916 qemu_system_reset_request();
917 return 1;
920 static inline void push_nmi(kvm_context_t kvm)
922 #ifdef KVM_CAP_USER_NMI
923 kvm_arch_push_nmi(kvm->opaque);
924 #endif /* KVM_CAP_USER_NMI */
927 void post_kvm_run(kvm_context_t kvm, CPUState *env)
929 pthread_mutex_lock(&qemu_mutex);
930 kvm_arch_post_kvm_run(kvm->opaque, env);
933 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
935 kvm_arch_pre_kvm_run(kvm->opaque, env);
937 pthread_mutex_unlock(&qemu_mutex);
938 return 0;
941 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
943 return vcpu->run->if_flag;
946 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
948 return vcpu->run->ready_for_interrupt_injection;
951 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
953 int r;
954 int fd = vcpu->fd;
955 struct kvm_run *run = vcpu->run;
956 kvm_context_t kvm = vcpu->kvm;
958 again:
959 push_nmi(kvm);
960 #if !defined(__s390__)
961 if (!kvm->irqchip_in_kernel)
962 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
963 #endif
964 r = pre_kvm_run(kvm, env);
965 if (r)
966 return r;
967 r = ioctl(fd, KVM_RUN, 0);
969 if (r == -1 && errno != EINTR && errno != EAGAIN) {
970 r = -errno;
971 post_kvm_run(kvm, env);
972 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
973 return r;
976 post_kvm_run(kvm, env);
978 #if defined(KVM_CAP_COALESCED_MMIO)
979 if (kvm->coalesced_mmio) {
980 struct kvm_coalesced_mmio_ring *ring = (void *)run +
981 kvm->coalesced_mmio * PAGE_SIZE;
982 while (ring->first != ring->last) {
983 kvm_mmio_write(kvm->opaque,
984 ring->coalesced_mmio[ring->first].phys_addr,
985 &ring->coalesced_mmio[ring->first].data[0],
986 ring->coalesced_mmio[ring->first].len);
987 smp_wmb();
988 ring->first = (ring->first + 1) %
989 KVM_COALESCED_MMIO_MAX;
992 #endif
994 #if !defined(__s390__)
995 if (r == -1) {
996 r = handle_io_window(kvm);
997 goto more;
999 #endif
1000 if (1) {
1001 switch (run->exit_reason) {
1002 case KVM_EXIT_UNKNOWN:
1003 r = handle_unhandled(run->hw.hardware_exit_reason);
1004 break;
1005 case KVM_EXIT_FAIL_ENTRY:
1006 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1007 break;
1008 case KVM_EXIT_EXCEPTION:
1009 fprintf(stderr, "exception %d (%x)\n",
1010 run->ex.exception,
1011 run->ex.error_code);
1012 kvm_show_regs(vcpu);
1013 kvm_show_code(vcpu);
1014 abort();
1015 break;
1016 case KVM_EXIT_IO:
1017 r = handle_io(vcpu);
1018 break;
1019 case KVM_EXIT_DEBUG:
1020 r = handle_debug(vcpu, env);
1021 break;
1022 case KVM_EXIT_MMIO:
1023 r = handle_mmio(vcpu);
1024 break;
1025 case KVM_EXIT_HLT:
1026 r = handle_halt(vcpu);
1027 break;
1028 case KVM_EXIT_IRQ_WINDOW_OPEN:
1029 break;
1030 case KVM_EXIT_SHUTDOWN:
1031 r = handle_shutdown(kvm, env);
1032 break;
1033 #if defined(__s390__)
1034 case KVM_EXIT_S390_SIEIC:
1035 r = kvm_s390_handle_intercept(kvm, vcpu,
1036 run);
1037 break;
1038 case KVM_EXIT_S390_RESET:
1039 r = kvm_s390_handle_reset(kvm, vcpu, run);
1040 break;
1041 #endif
1042 default:
1043 if (kvm_arch_run(vcpu)) {
1044 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1045 run->exit_reason);
1046 kvm_show_regs(vcpu);
1047 abort();
1049 break;
1052 more:
1053 if (!r)
1054 goto again;
1055 return r;
1058 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1060 struct kvm_interrupt intr;
1062 intr.irq = irq;
1063 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1066 #ifdef KVM_CAP_SET_GUEST_DEBUG
1067 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1069 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1071 #endif
1073 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1075 struct kvm_signal_mask *sigmask;
1076 int r;
1078 if (!sigset) {
1079 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1080 if (r == -1)
1081 r = -errno;
1082 return r;
1084 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1086 sigmask->len = 8;
1087 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1088 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1089 if (r == -1)
1090 r = -errno;
1091 free(sigmask);
1092 return r;
1095 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1097 return kvm->irqchip_in_kernel;
1100 int kvm_pit_in_kernel(kvm_context_t kvm)
1102 return kvm->pit_in_kernel;
1105 int kvm_has_sync_mmu(void)
1107 int r = 0;
1108 #ifdef KVM_CAP_SYNC_MMU
1109 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1110 #endif
1111 return r;
1114 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1116 #ifdef KVM_CAP_USER_NMI
1117 return ioctl(vcpu->fd, KVM_NMI);
1118 #else
1119 return -ENOSYS;
1120 #endif
1123 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1125 int r = 0;
1126 kvm->coalesced_mmio = 0;
1127 #ifdef KVM_CAP_COALESCED_MMIO
1128 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1129 if (r > 0) {
1130 kvm->coalesced_mmio = r;
1131 return 0;
1133 #endif
1134 return r;
1137 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1139 #ifdef KVM_CAP_COALESCED_MMIO
1140 kvm_context_t kvm = kvm_context;
1141 struct kvm_coalesced_mmio_zone zone;
1142 int r;
1144 if (kvm->coalesced_mmio) {
1146 zone.addr = addr;
1147 zone.size = size;
1149 r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone);
1150 if (r < 0) {
1151 perror("kvm_register_coalesced_mmio_zone");
1152 return r;
1154 return 0;
1156 #endif
1157 return -ENOSYS;
1160 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1162 #ifdef KVM_CAP_COALESCED_MMIO
1163 kvm_context_t kvm = kvm_context;
1164 struct kvm_coalesced_mmio_zone zone;
1165 int r;
1167 if (kvm->coalesced_mmio) {
1169 zone.addr = addr;
1170 zone.size = size;
1172 r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1173 if (r < 0) {
1174 perror("kvm_unregister_coalesced_mmio_zone");
1175 return r;
1177 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1178 return 0;
1180 #endif
1181 return -ENOSYS;
1184 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1185 int kvm_assign_pci_device(kvm_context_t kvm,
1186 struct kvm_assigned_pci_dev *assigned_dev)
1188 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1191 static int kvm_old_assign_irq(kvm_context_t kvm,
1192 struct kvm_assigned_irq *assigned_irq)
1194 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1197 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1198 int kvm_assign_irq(kvm_context_t kvm,
1199 struct kvm_assigned_irq *assigned_irq)
1201 int ret;
1203 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1204 if (ret > 0) {
1205 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1208 return kvm_old_assign_irq(kvm, assigned_irq);
1211 int kvm_deassign_irq(kvm_context_t kvm,
1212 struct kvm_assigned_irq *assigned_irq)
1214 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1216 #else
1217 int kvm_assign_irq(kvm_context_t kvm,
1218 struct kvm_assigned_irq *assigned_irq)
1220 return kvm_old_assign_irq(kvm, assigned_irq);
1222 #endif
1223 #endif
1225 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1226 int kvm_deassign_pci_device(kvm_context_t kvm,
1227 struct kvm_assigned_pci_dev *assigned_dev)
1229 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1231 #endif
1233 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1235 int ret = 0;
1237 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1238 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1239 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1240 if (ret <= 0)
1241 ret = 0;
1242 #endif
1243 return ret;
1246 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1248 #ifdef KVM_CAP_REINJECT_CONTROL
1249 int r;
1250 struct kvm_reinject_control control;
1252 control.pit_reinject = pit_reinject;
1254 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1255 if (r > 0) {
1256 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1258 #endif
1259 return -ENOSYS;
1262 int kvm_has_gsi_routing(kvm_context_t kvm)
1264 int r = 0;
1266 #ifdef KVM_CAP_IRQ_ROUTING
1267 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1268 #endif
1269 return r;
1272 int kvm_get_gsi_count(kvm_context_t kvm)
1274 #ifdef KVM_CAP_IRQ_ROUTING
1275 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1276 #else
1277 return -EINVAL;
1278 #endif
1281 int kvm_clear_gsi_routes(kvm_context_t kvm)
1283 #ifdef KVM_CAP_IRQ_ROUTING
1284 kvm->irq_routes->nr = 0;
1285 return 0;
1286 #else
1287 return -EINVAL;
1288 #endif
1291 int kvm_add_routing_entry(kvm_context_t kvm,
1292 struct kvm_irq_routing_entry* entry)
1294 #ifdef KVM_CAP_IRQ_ROUTING
1295 struct kvm_irq_routing *z;
1296 struct kvm_irq_routing_entry *new;
1297 int n, size;
1299 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1300 n = kvm->nr_allocated_irq_routes * 2;
1301 if (n < 64)
1302 n = 64;
1303 size = sizeof(struct kvm_irq_routing);
1304 size += n * sizeof(*new);
1305 z = realloc(kvm->irq_routes, size);
1306 if (!z)
1307 return -ENOMEM;
1308 kvm->nr_allocated_irq_routes = n;
1309 kvm->irq_routes = z;
1311 n = kvm->irq_routes->nr++;
1312 new = &kvm->irq_routes->entries[n];
1313 memset(new, 0, sizeof(*new));
1314 new->gsi = entry->gsi;
1315 new->type = entry->type;
1316 new->flags = entry->flags;
1317 new->u = entry->u;
1319 set_gsi(kvm, entry->gsi);
1321 return 0;
1322 #else
1323 return -ENOSYS;
1324 #endif
1327 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1329 #ifdef KVM_CAP_IRQ_ROUTING
1330 struct kvm_irq_routing_entry e;
1332 e.gsi = gsi;
1333 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1334 e.flags = 0;
1335 e.u.irqchip.irqchip = irqchip;
1336 e.u.irqchip.pin = pin;
1337 return kvm_add_routing_entry(kvm, &e);
1338 #else
1339 return -ENOSYS;
1340 #endif
1343 int kvm_del_routing_entry(kvm_context_t kvm,
1344 struct kvm_irq_routing_entry* entry)
1346 #ifdef KVM_CAP_IRQ_ROUTING
1347 struct kvm_irq_routing_entry *e, *p;
1348 int i, gsi, found = 0;
1350 gsi = entry->gsi;
1352 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1353 e = &kvm->irq_routes->entries[i];
1354 if (e->type == entry->type
1355 && e->gsi == gsi) {
1356 switch (e->type)
1358 case KVM_IRQ_ROUTING_IRQCHIP: {
1359 if (e->u.irqchip.irqchip ==
1360 entry->u.irqchip.irqchip
1361 && e->u.irqchip.pin ==
1362 entry->u.irqchip.pin) {
1363 p = &kvm->irq_routes->
1364 entries[--kvm->irq_routes->nr];
1365 *e = *p;
1366 found = 1;
1368 break;
1370 case KVM_IRQ_ROUTING_MSI: {
1371 if (e->u.msi.address_lo ==
1372 entry->u.msi.address_lo
1373 && e->u.msi.address_hi ==
1374 entry->u.msi.address_hi
1375 && e->u.msi.data == entry->u.msi.data) {
1376 p = &kvm->irq_routes->
1377 entries[--kvm->irq_routes->nr];
1378 *e = *p;
1379 found = 1;
1381 break;
1383 default:
1384 break;
1386 if (found) {
1387 /* If there are no other users of this GSI
1388 * mark it available in the bitmap */
1389 for (i = 0; i < kvm->irq_routes->nr; i++) {
1390 e = &kvm->irq_routes->entries[i];
1391 if (e->gsi == gsi)
1392 break;
1394 if (i == kvm->irq_routes->nr)
1395 clear_gsi(kvm, gsi);
1397 return 0;
1401 return -ESRCH;
1402 #else
1403 return -ENOSYS;
1404 #endif
1407 int kvm_update_routing_entry(kvm_context_t kvm,
1408 struct kvm_irq_routing_entry* entry,
1409 struct kvm_irq_routing_entry* newentry)
1411 #ifdef KVM_CAP_IRQ_ROUTING
1412 struct kvm_irq_routing_entry *e;
1413 int i;
1415 if (entry->gsi != newentry->gsi ||
1416 entry->type != newentry->type) {
1417 return -EINVAL;
1420 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1421 e = &kvm->irq_routes->entries[i];
1422 if (e->type != entry->type || e->gsi != entry->gsi) {
1423 continue;
1425 switch (e->type) {
1426 case KVM_IRQ_ROUTING_IRQCHIP:
1427 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1428 e->u.irqchip.pin == entry->u.irqchip.pin) {
1429 memcpy(&e->u.irqchip, &newentry->u.irqchip, sizeof e->u.irqchip);
1430 return 0;
1432 break;
1433 case KVM_IRQ_ROUTING_MSI:
1434 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1435 e->u.msi.address_hi == entry->u.msi.address_hi &&
1436 e->u.msi.data == entry->u.msi.data) {
1437 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1438 return 0;
1440 break;
1441 default:
1442 break;
1445 return -ESRCH;
1446 #else
1447 return -ENOSYS;
1448 #endif
1451 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1453 #ifdef KVM_CAP_IRQ_ROUTING
1454 struct kvm_irq_routing_entry e;
1456 e.gsi = gsi;
1457 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1458 e.flags = 0;
1459 e.u.irqchip.irqchip = irqchip;
1460 e.u.irqchip.pin = pin;
1461 return kvm_del_routing_entry(kvm, &e);
1462 #else
1463 return -ENOSYS;
1464 #endif
1467 int kvm_commit_irq_routes(kvm_context_t kvm)
1469 #ifdef KVM_CAP_IRQ_ROUTING
1470 kvm->irq_routes->flags = 0;
1471 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1472 #else
1473 return -ENOSYS;
1474 #endif
1477 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1479 int i, bit;
1480 uint32_t *buf = kvm->used_gsi_bitmap;
1482 /* Return the lowest unused GSI in the bitmap */
1483 for (i = 0; i < kvm->max_gsi / 32; i++) {
1484 bit = ffs(~buf[i]);
1485 if (!bit)
1486 continue;
1488 return bit - 1 + i * 32;
1491 return -ENOSPC;
1494 #ifdef KVM_CAP_DEVICE_MSIX
1495 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1496 struct kvm_assigned_msix_nr *msix_nr)
1498 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1501 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1502 struct kvm_assigned_msix_entry *entry)
1504 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1506 #endif
1508 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1510 #include <sys/eventfd.h>
1512 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1514 struct kvm_irqfd data = {
1515 .fd = fd,
1516 .gsi = gsi,
1517 .flags = flags,
1520 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1523 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1525 int r;
1526 int fd;
1528 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1529 return -ENOENT;
1531 fd = eventfd(0, 0);
1532 if (fd < 0)
1533 return -errno;
1535 r = _kvm_irqfd(kvm, fd, gsi, 0);
1536 if (r < 0) {
1537 close(fd);
1538 return -errno;
1541 return fd;
1544 #else /* KVM_CAP_IRQFD */
1546 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1548 return -ENOSYS;
1551 #endif /* KVM_CAP_IRQFD */
1552 static inline unsigned long kvm_get_thread_id(void)
1554 return syscall(SYS_gettid);
1557 static void qemu_cond_wait(pthread_cond_t *cond)
1559 CPUState *env = cpu_single_env;
1560 static const struct timespec ts = {
1561 .tv_sec = 0,
1562 .tv_nsec = 100000,
1565 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1566 cpu_single_env = env;
1569 static void sig_ipi_handler(int n)
1573 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1575 struct qemu_work_item wi;
1577 if (env == current_env) {
1578 func(data);
1579 return;
1582 wi.func = func;
1583 wi.data = data;
1584 if (!env->kvm_cpu_state.queued_work_first)
1585 env->kvm_cpu_state.queued_work_first = &wi;
1586 else
1587 env->kvm_cpu_state.queued_work_last->next = &wi;
1588 env->kvm_cpu_state.queued_work_last = &wi;
1589 wi.next = NULL;
1590 wi.done = false;
1592 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1593 while (!wi.done)
1594 qemu_cond_wait(&qemu_work_cond);
1597 static void inject_interrupt(void *data)
1599 cpu_interrupt(current_env, (long)data);
1602 void kvm_inject_interrupt(CPUState *env, int mask)
1604 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1607 void kvm_update_interrupt_request(CPUState *env)
1609 int signal = 0;
1611 if (env) {
1612 if (!current_env || !current_env->created)
1613 signal = 1;
1615 * Testing for created here is really redundant
1617 if (current_env && current_env->created &&
1618 env != current_env && !env->kvm_cpu_state.signalled)
1619 signal = 1;
1621 if (signal) {
1622 env->kvm_cpu_state.signalled = 1;
1623 if (env->kvm_cpu_state.thread)
1624 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1629 static void kvm_do_load_registers(void *_env)
1631 CPUState *env = _env;
1633 kvm_arch_load_regs(env);
1636 void kvm_load_registers(CPUState *env)
1638 if (kvm_enabled() && qemu_system_ready)
1639 on_vcpu(env, kvm_do_load_registers, env);
1642 static void kvm_do_save_registers(void *_env)
1644 CPUState *env = _env;
1646 kvm_arch_save_regs(env);
1649 void kvm_save_registers(CPUState *env)
1651 if (kvm_enabled())
1652 on_vcpu(env, kvm_do_save_registers, env);
1655 static void kvm_do_load_mpstate(void *_env)
1657 CPUState *env = _env;
1659 kvm_arch_load_mpstate(env);
1662 void kvm_load_mpstate(CPUState *env)
1664 if (kvm_enabled() && qemu_system_ready)
1665 on_vcpu(env, kvm_do_load_mpstate, env);
1668 static void kvm_do_save_mpstate(void *_env)
1670 CPUState *env = _env;
1672 kvm_arch_save_mpstate(env);
1673 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1676 void kvm_save_mpstate(CPUState *env)
1678 if (kvm_enabled())
1679 on_vcpu(env, kvm_do_save_mpstate, env);
1682 int kvm_cpu_exec(CPUState *env)
1684 int r;
1686 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1687 if (r < 0) {
1688 printf("kvm_run returned %d\n", r);
1689 vm_stop(0);
1692 return 0;
1695 static int is_cpu_stopped(CPUState *env)
1697 return !vm_running || env->stopped;
1700 static void flush_queued_work(CPUState *env)
1702 struct qemu_work_item *wi;
1704 if (!env->kvm_cpu_state.queued_work_first)
1705 return;
1707 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1708 env->kvm_cpu_state.queued_work_first = wi->next;
1709 wi->func(wi->data);
1710 wi->done = true;
1712 env->kvm_cpu_state.queued_work_last = NULL;
1713 pthread_cond_broadcast(&qemu_work_cond);
1716 static void kvm_main_loop_wait(CPUState *env, int timeout)
1718 struct timespec ts;
1719 int r, e;
1720 siginfo_t siginfo;
1721 sigset_t waitset;
1723 pthread_mutex_unlock(&qemu_mutex);
1725 ts.tv_sec = timeout / 1000;
1726 ts.tv_nsec = (timeout % 1000) * 1000000;
1727 sigemptyset(&waitset);
1728 sigaddset(&waitset, SIG_IPI);
1730 r = sigtimedwait(&waitset, &siginfo, &ts);
1731 e = errno;
1733 pthread_mutex_lock(&qemu_mutex);
1735 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1736 printf("sigtimedwait: %s\n", strerror(e));
1737 exit(1);
1740 cpu_single_env = env;
1741 flush_queued_work(env);
1743 if (env->stop) {
1744 env->stop = 0;
1745 env->stopped = 1;
1746 pthread_cond_signal(&qemu_pause_cond);
1749 env->kvm_cpu_state.signalled = 0;
1752 static int all_threads_paused(void)
1754 CPUState *penv = first_cpu;
1756 while (penv) {
1757 if (penv->stop)
1758 return 0;
1759 penv = (CPUState *)penv->next_cpu;
1762 return 1;
1765 static void pause_all_threads(void)
1767 CPUState *penv = first_cpu;
1769 while (penv) {
1770 if (penv != cpu_single_env) {
1771 penv->stop = 1;
1772 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1773 } else {
1774 penv->stop = 0;
1775 penv->stopped = 1;
1776 cpu_exit(penv);
1778 penv = (CPUState *)penv->next_cpu;
1781 while (!all_threads_paused())
1782 qemu_cond_wait(&qemu_pause_cond);
1785 static void resume_all_threads(void)
1787 CPUState *penv = first_cpu;
1789 assert(!cpu_single_env);
1791 while (penv) {
1792 penv->stop = 0;
1793 penv->stopped = 0;
1794 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1795 penv = (CPUState *)penv->next_cpu;
1799 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1801 if (running)
1802 resume_all_threads();
1803 else
1804 pause_all_threads();
1807 static void setup_kernel_sigmask(CPUState *env)
1809 sigset_t set;
1811 sigemptyset(&set);
1812 sigaddset(&set, SIGUSR2);
1813 sigaddset(&set, SIGIO);
1814 sigaddset(&set, SIGALRM);
1815 sigprocmask(SIG_BLOCK, &set, NULL);
1817 sigprocmask(SIG_BLOCK, NULL, &set);
1818 sigdelset(&set, SIG_IPI);
1820 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1823 static void qemu_kvm_system_reset(void)
1825 CPUState *penv = first_cpu;
1827 pause_all_threads();
1829 qemu_system_reset();
1831 while (penv) {
1832 kvm_arch_cpu_reset(penv);
1833 penv = (CPUState *)penv->next_cpu;
1836 resume_all_threads();
1839 static void process_irqchip_events(CPUState *env)
1841 kvm_arch_process_irqchip_events(env);
1842 if (kvm_arch_has_work(env))
1843 env->halted = 0;
1846 static int kvm_main_loop_cpu(CPUState *env)
1848 setup_kernel_sigmask(env);
1850 pthread_mutex_lock(&qemu_mutex);
1852 kvm_qemu_init_env(env);
1853 #ifdef TARGET_I386
1854 kvm_tpr_vcpu_start(env);
1855 #endif
1857 cpu_single_env = env;
1858 kvm_arch_load_regs(env);
1860 while (1) {
1861 int run_cpu = !is_cpu_stopped(env);
1862 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1863 process_irqchip_events(env);
1864 run_cpu = !env->halted;
1866 if (run_cpu) {
1867 kvm_main_loop_wait(env, 0);
1868 kvm_cpu_exec(env);
1869 } else {
1870 kvm_main_loop_wait(env, 1000);
1873 pthread_mutex_unlock(&qemu_mutex);
1874 return 0;
1877 static void *ap_main_loop(void *_env)
1879 CPUState *env = _env;
1880 sigset_t signals;
1881 struct ioperm_data *data = NULL;
1883 current_env = env;
1884 env->thread_id = kvm_get_thread_id();
1885 sigfillset(&signals);
1886 sigprocmask(SIG_BLOCK, &signals, NULL);
1887 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1889 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1890 /* do ioperm for io ports of assigned devices */
1891 LIST_FOREACH(data, &ioperm_head, entries)
1892 on_vcpu(env, kvm_arch_do_ioperm, data);
1893 #endif
1895 /* signal VCPU creation */
1896 pthread_mutex_lock(&qemu_mutex);
1897 current_env->created = 1;
1898 pthread_cond_signal(&qemu_vcpu_cond);
1900 /* and wait for machine initialization */
1901 while (!qemu_system_ready)
1902 qemu_cond_wait(&qemu_system_cond);
1903 pthread_mutex_unlock(&qemu_mutex);
1905 kvm_main_loop_cpu(env);
1906 return NULL;
1909 void kvm_init_vcpu(CPUState *env)
1911 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1913 while (env->created == 0)
1914 qemu_cond_wait(&qemu_vcpu_cond);
1917 int kvm_vcpu_inited(CPUState *env)
1919 return env->created;
1922 #ifdef TARGET_I386
1923 void kvm_hpet_disable_kpit(void)
1925 struct kvm_pit_state2 ps2;
1927 kvm_get_pit2(kvm_context, &ps2);
1928 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1929 kvm_set_pit2(kvm_context, &ps2);
1932 void kvm_hpet_enable_kpit(void)
1934 struct kvm_pit_state2 ps2;
1936 kvm_get_pit2(kvm_context, &ps2);
1937 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1938 kvm_set_pit2(kvm_context, &ps2);
1940 #endif
1942 int kvm_init_ap(void)
1944 #ifdef TARGET_I386
1945 kvm_tpr_opt_setup();
1946 #endif
1947 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1949 signal(SIG_IPI, sig_ipi_handler);
1950 return 0;
1953 void qemu_kvm_notify_work(void)
1955 uint64_t value = 1;
1956 char buffer[8];
1957 size_t offset = 0;
1959 if (io_thread_fd == -1)
1960 return;
1962 memcpy(buffer, &value, sizeof(value));
1964 while (offset < 8) {
1965 ssize_t len;
1967 len = write(io_thread_fd, buffer + offset, 8 - offset);
1968 if (len == -1 && errno == EINTR)
1969 continue;
1971 /* In case we have a pipe, there is not reason to insist writing
1972 * 8 bytes
1974 if (len == -1 && errno == EAGAIN)
1975 break;
1977 if (len <= 0)
1978 break;
1980 offset += len;
1984 /* If we have signalfd, we mask out the signals we want to handle and then
1985 * use signalfd to listen for them. We rely on whatever the current signal
1986 * handler is to dispatch the signals when we receive them.
1989 static void sigfd_handler(void *opaque)
1991 int fd = (unsigned long)opaque;
1992 struct qemu_signalfd_siginfo info;
1993 struct sigaction action;
1994 ssize_t len;
1996 while (1) {
1997 do {
1998 len = read(fd, &info, sizeof(info));
1999 } while (len == -1 && errno == EINTR);
2001 if (len == -1 && errno == EAGAIN)
2002 break;
2004 if (len != sizeof(info)) {
2005 printf("read from sigfd returned %zd: %m\n", len);
2006 return;
2009 sigaction(info.ssi_signo, NULL, &action);
2010 if (action.sa_handler)
2011 action.sa_handler(info.ssi_signo);
2016 /* Used to break IO thread out of select */
2017 static void io_thread_wakeup(void *opaque)
2019 int fd = (unsigned long)opaque;
2020 char buffer[4096];
2022 /* Drain the pipe/(eventfd) */
2023 while (1) {
2024 ssize_t len;
2026 len = read(fd, buffer, sizeof(buffer));
2027 if (len == -1 && errno == EINTR)
2028 continue;
2030 if (len <= 0)
2031 break;
2035 int kvm_main_loop(void)
2037 int fds[2];
2038 sigset_t mask;
2039 int sigfd;
2041 io_thread = pthread_self();
2042 qemu_system_ready = 1;
2044 if (qemu_eventfd(fds) == -1) {
2045 fprintf(stderr, "failed to create eventfd\n");
2046 return -errno;
2049 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2050 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2052 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2053 (void *)(unsigned long)fds[0]);
2055 io_thread_fd = fds[1];
2057 sigemptyset(&mask);
2058 sigaddset(&mask, SIGIO);
2059 sigaddset(&mask, SIGALRM);
2060 sigprocmask(SIG_BLOCK, &mask, NULL);
2062 sigfd = qemu_signalfd(&mask);
2063 if (sigfd == -1) {
2064 fprintf(stderr, "failed to create signalfd\n");
2065 return -errno;
2068 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2070 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2071 (void *)(unsigned long)sigfd);
2073 pthread_cond_broadcast(&qemu_system_cond);
2075 io_thread_sigfd = sigfd;
2076 cpu_single_env = NULL;
2078 while (1) {
2079 main_loop_wait(1000);
2080 if (qemu_shutdown_requested()) {
2081 if (qemu_no_shutdown()) {
2082 vm_stop(0);
2083 } else
2084 break;
2085 } else if (qemu_powerdown_requested())
2086 qemu_system_powerdown();
2087 else if (qemu_reset_requested())
2088 qemu_kvm_system_reset();
2089 else if (kvm_debug_cpu_requested) {
2090 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2091 vm_stop(EXCP_DEBUG);
2092 kvm_debug_cpu_requested = NULL;
2096 pause_all_threads();
2097 pthread_mutex_unlock(&qemu_mutex);
2099 return 0;
2102 #ifdef TARGET_I386
2103 static int destroy_region_works = 0;
2104 #endif
2107 #if !defined(TARGET_I386)
2108 int kvm_arch_init_irq_routing(void)
2110 return 0;
2112 #endif
2114 extern int no_hpet;
2116 static int kvm_create_context()
2118 int r;
2120 if (!kvm_irqchip) {
2121 kvm_disable_irqchip_creation(kvm_context);
2123 if (!kvm_pit) {
2124 kvm_disable_pit_creation(kvm_context);
2126 if (kvm_create(kvm_context, 0, NULL) < 0) {
2127 kvm_finalize(kvm_state);
2128 return -1;
2130 r = kvm_arch_qemu_create_context();
2131 if(r <0)
2132 kvm_finalize(kvm_state);
2133 if (kvm_pit && !kvm_pit_reinject) {
2134 if (kvm_reinject_control(kvm_context, 0)) {
2135 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2136 return -1;
2139 #ifdef TARGET_I386
2140 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2141 #endif
2143 r = kvm_arch_init_irq_routing();
2144 if (r < 0) {
2145 return r;
2148 kvm_init_ap();
2149 if (kvm_irqchip) {
2150 if (!qemu_kvm_has_gsi_routing()) {
2151 irq0override = 0;
2152 #ifdef TARGET_I386
2153 /* if kernel can't do irq routing, interrupt source
2154 * override 0->2 can not be set up as required by hpet,
2155 * so disable hpet.
2157 no_hpet=1;
2158 } else if (!qemu_kvm_has_pit_state2()) {
2159 no_hpet=1;
2161 #else
2163 #endif
2166 return 0;
2169 #ifdef TARGET_I386
2170 static int must_use_aliases_source(target_phys_addr_t addr)
2172 if (destroy_region_works)
2173 return false;
2174 if (addr == 0xa0000 || addr == 0xa8000)
2175 return true;
2176 return false;
2179 static int must_use_aliases_target(target_phys_addr_t addr)
2181 if (destroy_region_works)
2182 return false;
2183 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2184 return true;
2185 return false;
2188 static struct mapping {
2189 target_phys_addr_t phys;
2190 ram_addr_t ram;
2191 ram_addr_t len;
2192 } mappings[50];
2193 static int nr_mappings;
2195 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2197 struct mapping *p;
2199 for (p = mappings; p < mappings + nr_mappings; ++p) {
2200 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2201 return p;
2204 return NULL;
2207 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2209 struct mapping *p;
2211 for (p = mappings; p < mappings + nr_mappings; ++p) {
2212 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2213 return p;
2216 return NULL;
2219 static void drop_mapping(target_phys_addr_t start_addr)
2221 struct mapping *p = find_mapping(start_addr);
2223 if (p)
2224 *p = mappings[--nr_mappings];
2226 #endif
2228 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2229 ram_addr_t phys_offset)
2231 int r = 0;
2232 unsigned long area_flags;
2233 #ifdef TARGET_I386
2234 struct mapping *p;
2235 #endif
2237 if (start_addr + size > phys_ram_size) {
2238 phys_ram_size = start_addr + size;
2241 phys_offset &= ~IO_MEM_ROM;
2242 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2244 if (area_flags != IO_MEM_RAM) {
2245 #ifdef TARGET_I386
2246 if (must_use_aliases_source(start_addr)) {
2247 kvm_destroy_memory_alias(kvm_context, start_addr);
2248 return;
2250 if (must_use_aliases_target(start_addr))
2251 return;
2252 #endif
2253 while (size > 0) {
2254 p = find_mapping(start_addr);
2255 if (p) {
2256 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2257 drop_mapping(p->phys);
2259 start_addr += TARGET_PAGE_SIZE;
2260 if (size > TARGET_PAGE_SIZE) {
2261 size -= TARGET_PAGE_SIZE;
2262 } else {
2263 size = 0;
2266 return;
2269 r = kvm_is_containing_region(kvm_context, start_addr, size);
2270 if (r)
2271 return;
2273 if (area_flags >= TLB_MMIO)
2274 return;
2276 #ifdef TARGET_I386
2277 if (must_use_aliases_source(start_addr)) {
2278 p = find_ram_mapping(phys_offset);
2279 if (p) {
2280 kvm_create_memory_alias(kvm_context, start_addr, size,
2281 p->phys + (phys_offset - p->ram));
2283 return;
2285 #endif
2287 r = kvm_register_phys_mem(kvm_context, start_addr,
2288 qemu_get_ram_ptr(phys_offset),
2289 size, 0);
2290 if (r < 0) {
2291 printf("kvm_cpu_register_physical_memory: failed\n");
2292 exit(1);
2295 #ifdef TARGET_I386
2296 drop_mapping(start_addr);
2297 p = &mappings[nr_mappings++];
2298 p->phys = start_addr;
2299 p->ram = phys_offset;
2300 p->len = size;
2301 #endif
2303 return;
2306 int kvm_setup_guest_memory(void *area, unsigned long size)
2308 int ret = 0;
2310 #ifdef MADV_DONTFORK
2311 if (kvm_enabled() && !kvm_has_sync_mmu())
2312 ret = madvise(area, size, MADV_DONTFORK);
2313 #endif
2315 if (ret)
2316 perror ("madvise");
2318 return ret;
2321 int kvm_qemu_check_extension(int ext)
2323 return kvm_check_extension(kvm_state, ext);
2326 int kvm_qemu_init_env(CPUState *cenv)
2328 return kvm_arch_qemu_init_env(cenv);
2331 #ifdef KVM_CAP_SET_GUEST_DEBUG
2333 struct kvm_set_guest_debug_data {
2334 struct kvm_guest_debug dbg;
2335 int err;
2338 static void kvm_invoke_set_guest_debug(void *data)
2340 struct kvm_set_guest_debug_data *dbg_data = data;
2342 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2343 &dbg_data->dbg);
2346 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2348 struct kvm_set_guest_debug_data data;
2350 data.dbg.control = 0;
2351 if (env->singlestep_enabled)
2352 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2354 kvm_arch_update_guest_debug(env, &data.dbg);
2355 data.dbg.control |= reinject_trap;
2357 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2358 return data.err;
2361 #endif
2364 * dirty pages logging
2366 /* FIXME: use unsigned long pointer instead of unsigned char */
2367 unsigned char *kvm_dirty_bitmap = NULL;
2368 int kvm_physical_memory_set_dirty_tracking(int enable)
2370 int r = 0;
2372 if (!kvm_enabled())
2373 return 0;
2375 if (enable) {
2376 if (!kvm_dirty_bitmap) {
2377 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2378 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2379 if (kvm_dirty_bitmap == NULL) {
2380 perror("Failed to allocate dirty pages bitmap");
2381 r=-1;
2383 else {
2384 r = kvm_dirty_pages_log_enable_all(kvm_context);
2388 else {
2389 if (kvm_dirty_bitmap) {
2390 r = kvm_dirty_pages_log_reset(kvm_context);
2391 qemu_free(kvm_dirty_bitmap);
2392 kvm_dirty_bitmap = NULL;
2395 return r;
2398 /* get kvm's dirty pages bitmap and update qemu's */
2399 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2400 unsigned char *bitmap,
2401 unsigned long offset,
2402 unsigned long mem_size)
2404 unsigned int i, j, n=0;
2405 unsigned char c;
2406 unsigned long page_number, addr, addr1;
2407 ram_addr_t ram_addr;
2408 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2411 * bitmap-traveling is faster than memory-traveling (for addr...)
2412 * especially when most of the memory is not dirty.
2414 for (i=0; i<len; i++) {
2415 c = bitmap[i];
2416 while (c>0) {
2417 j = ffsl(c) - 1;
2418 c &= ~(1u<<j);
2419 page_number = i * 8 + j;
2420 addr1 = page_number * TARGET_PAGE_SIZE;
2421 addr = offset + addr1;
2422 ram_addr = cpu_get_physical_page_desc(addr);
2423 cpu_physical_memory_set_dirty(ram_addr);
2424 n++;
2427 return 0;
2429 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2430 void *bitmap, void *opaque)
2432 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2436 * get kvm's dirty pages bitmap and update qemu's
2437 * we only care about physical ram, which resides in slots 0 and 3
2439 int kvm_update_dirty_pages_log(void)
2441 int r = 0;
2444 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2445 NULL,
2446 kvm_get_dirty_bitmap_cb);
2447 return r;
2450 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2451 int log)
2453 if (log)
2454 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2455 else {
2456 #ifdef TARGET_I386
2457 if (must_use_aliases_target(start))
2458 return;
2459 #endif
2460 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2464 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2466 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2467 unsigned int brsize = BITMAP_SIZE(ram_size);
2468 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2469 unsigned int extra_bytes = (extra_pages +7)/8;
2470 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2471 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2473 memset(bitmap, 0xFF, brsize + extra_bytes);
2474 memset(bitmap + hole_start, 0, hole_end - hole_start);
2475 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2477 return 0;
2480 #ifdef KVM_CAP_IRQCHIP
2482 int kvm_set_irq(int irq, int level, int *status)
2484 return kvm_set_irq_level(kvm_context, irq, level, status);
2487 #endif
2489 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2491 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2494 void kvm_mutex_unlock(void)
2496 assert(!cpu_single_env);
2497 pthread_mutex_unlock(&qemu_mutex);
2500 void kvm_mutex_lock(void)
2502 pthread_mutex_lock(&qemu_mutex);
2503 cpu_single_env = NULL;
2506 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2507 void kvm_add_ioperm_data(struct ioperm_data *data)
2509 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2512 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2514 struct ioperm_data *data;
2516 data = LIST_FIRST(&ioperm_head);
2517 while (data) {
2518 struct ioperm_data *next = LIST_NEXT(data, entries);
2520 if (data->start_port == start_port && data->num == num) {
2521 LIST_REMOVE(data, entries);
2522 qemu_free(data);
2525 data = next;
2529 void kvm_ioperm(CPUState *env, void *data)
2531 if (kvm_enabled() && qemu_system_ready)
2532 on_vcpu(env, kvm_arch_do_ioperm, data);
2535 #endif
2537 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2539 #ifndef TARGET_IA64
2541 #ifdef TARGET_I386
2542 if (must_use_aliases_source(start_addr))
2543 return 0;
2544 #endif
2546 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2547 NULL, kvm_get_dirty_bitmap_cb);
2548 #endif
2549 return 0;
2552 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2554 #ifdef TARGET_I386
2555 if (must_use_aliases_source(phys_addr))
2556 return 0;
2557 #endif
2559 #ifndef TARGET_IA64
2560 kvm_qemu_log_memory(phys_addr, len, 1);
2561 #endif
2562 return 0;
2565 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2567 #ifdef TARGET_I386
2568 if (must_use_aliases_source(phys_addr))
2569 return 0;
2570 #endif
2572 #ifndef TARGET_IA64
2573 kvm_qemu_log_memory(phys_addr, len, 0);
2574 #endif
2575 return 0;
2578 int kvm_set_boot_cpu_id(uint32_t id)
2580 return kvm_set_boot_vcpu_id(kvm_context, id);
2583 #ifdef TARGET_I386
2584 #ifdef KVM_CAP_MCE
2585 struct kvm_x86_mce_data
2587 CPUState *env;
2588 struct kvm_x86_mce *mce;
2591 static void kvm_do_inject_x86_mce(void *_data)
2593 struct kvm_x86_mce_data *data = _data;
2594 int r;
2596 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2597 if (r < 0)
2598 perror("kvm_set_mce FAILED");
2600 #endif
2602 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2603 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2605 #ifdef KVM_CAP_MCE
2606 struct kvm_x86_mce mce = {
2607 .bank = bank,
2608 .status = status,
2609 .mcg_status = mcg_status,
2610 .addr = addr,
2611 .misc = misc,
2613 struct kvm_x86_mce_data data = {
2614 .env = cenv,
2615 .mce = &mce,
2618 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2619 #endif
2621 #endif