Retrieve mp state info in cpu_synchronize_state()
[qemu-kvm/fedora.git] / qemu-kvm.c
blob10d44a6f51963ca9690b3a5bba9bad0202f6b9a9
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm-all.h"
23 #include "libkvm.h"
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
45 kvm_context_t kvm_context;
47 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
48 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
49 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
50 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
51 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
52 __thread CPUState *current_env;
54 static int qemu_system_ready;
56 #define SIG_IPI (SIGRTMIN+4)
58 pthread_t io_thread;
59 static int io_thread_fd = -1;
60 static int io_thread_sigfd = -1;
62 static CPUState *kvm_debug_cpu_requested;
64 static uint64_t phys_ram_size;
66 /* The list of ioperm_data */
67 static LIST_HEAD(, ioperm_data) ioperm_head;
69 //#define DEBUG_MEMREG
70 #ifdef DEBUG_MEMREG
71 #define DPRINTF(fmt, args...) \
72 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
73 #else
74 #define DPRINTF(fmt, args...) do {} while (0)
75 #endif
77 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
79 int kvm_abi = EXPECTED_KVM_API_VERSION;
80 int kvm_page_size;
82 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
84 uint32_t *bitmap = kvm->used_gsi_bitmap;
86 if (gsi < kvm->max_gsi)
87 bitmap[gsi / 32] |= 1U << (gsi % 32);
88 else
89 DPRINTF("Invalid GSI %d\n");
92 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
94 uint32_t *bitmap = kvm->used_gsi_bitmap;
96 if (gsi < kvm->max_gsi)
97 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
98 else
99 DPRINTF("Invalid GSI %d\n");
102 struct slot_info {
103 unsigned long phys_addr;
104 unsigned long len;
105 unsigned long userspace_addr;
106 unsigned flags;
107 int logging_count;
110 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
112 static void init_slots(void)
114 int i;
116 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
117 slots[i].len = 0;
120 static int get_free_slot(kvm_context_t kvm)
122 int i;
123 int tss_ext;
125 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
126 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
127 #else
128 tss_ext = 0;
129 #endif
132 * on older kernels where the set tss ioctl is not supprted we must save
133 * slot 0 to hold the extended memory, as the vmx will use the last 3
134 * pages of this slot.
136 if (tss_ext > 0)
137 i = 0;
138 else
139 i = 1;
141 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
142 if (!slots[i].len)
143 return i;
144 return -1;
147 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
148 unsigned long userspace_addr, unsigned flags)
150 slots[slot].phys_addr = phys_addr;
151 slots[slot].len = len;
152 slots[slot].userspace_addr = userspace_addr;
153 slots[slot].flags = flags;
156 static void free_slot(int slot)
158 slots[slot].len = 0;
159 slots[slot].logging_count = 0;
162 static int get_slot(unsigned long phys_addr)
164 int i;
166 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
167 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
168 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
169 return i;
171 return -1;
174 /* Returns -1 if this slot is not totally contained on any other,
175 * and the number of the slot otherwise */
176 static int get_container_slot(uint64_t phys_addr, unsigned long size)
178 int i;
180 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
181 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
182 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
183 return i;
184 return -1;
187 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
189 int slot = get_container_slot(phys_addr, size);
190 if (slot == -1)
191 return 0;
192 return 1;
196 * dirty pages logging control
198 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
199 unsigned long phys_addr,
200 unsigned flags,
201 unsigned mask)
203 int r = -1;
204 int slot = get_slot(phys_addr);
206 if (slot == -1) {
207 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
208 return 1;
211 flags = (slots[slot].flags & ~mask) | flags;
212 if (flags == slots[slot].flags)
213 return 0;
214 slots[slot].flags = flags;
217 struct kvm_userspace_memory_region mem = {
218 .slot = slot,
219 .memory_size = slots[slot].len,
220 .guest_phys_addr = slots[slot].phys_addr,
221 .userspace_addr = slots[slot].userspace_addr,
222 .flags = slots[slot].flags,
226 DPRINTF("slot %d start %llx len %llx flags %x\n",
227 mem.slot,
228 mem.guest_phys_addr,
229 mem.memory_size,
230 mem.flags);
231 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
232 if (r == -1)
233 fprintf(stderr, "%s: %m\n", __FUNCTION__);
235 return r;
238 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
239 int (*change)(kvm_context_t kvm,
240 uint64_t start,
241 uint64_t len))
243 int i, r;
245 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
246 if (slots[i].len)
247 r = change(kvm, slots[i].phys_addr, slots[i].len);
249 return r;
252 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
253 uint64_t phys_addr,
254 uint64_t len)
256 int slot = get_slot(phys_addr);
258 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
259 if (slot == -1) {
260 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
261 return -EINVAL;
264 if (slots[slot].logging_count++)
265 return 0;
267 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
268 KVM_MEM_LOG_DIRTY_PAGES,
269 KVM_MEM_LOG_DIRTY_PAGES);
272 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
273 uint64_t phys_addr,
274 uint64_t len)
276 int slot = get_slot(phys_addr);
278 if (slot == -1) {
279 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
280 return -EINVAL;
283 if (--slots[slot].logging_count)
284 return 0;
286 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
288 KVM_MEM_LOG_DIRTY_PAGES);
292 * Enable dirty page logging for all memory regions
294 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
296 if (kvm->dirty_pages_log_all)
297 return 0;
298 kvm->dirty_pages_log_all = 1;
299 return kvm_dirty_pages_log_change_all(kvm,
300 kvm_dirty_pages_log_enable_slot);
304 * Enable dirty page logging only for memory regions that were created with
305 * dirty logging enabled (disable for all other memory regions).
307 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
309 if (!kvm->dirty_pages_log_all)
310 return 0;
311 kvm->dirty_pages_log_all = 0;
312 return kvm_dirty_pages_log_change_all(kvm,
313 kvm_dirty_pages_log_disable_slot);
317 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
318 void *opaque)
320 int fd;
321 kvm_context_t kvm;
322 int r, gsi_count;
324 fd = open("/dev/kvm", O_RDWR);
325 if (fd == -1) {
326 perror("open /dev/kvm");
327 return NULL;
329 r = ioctl(fd, KVM_GET_API_VERSION, 0);
330 if (r == -1) {
331 fprintf(stderr, "kvm kernel version too old: "
332 "KVM_GET_API_VERSION ioctl not supported\n");
333 goto out_close;
335 if (r < EXPECTED_KVM_API_VERSION) {
336 fprintf(stderr, "kvm kernel version too old: "
337 "We expect API version %d or newer, but got "
338 "version %d\n",
339 EXPECTED_KVM_API_VERSION, r);
340 goto out_close;
342 if (r > EXPECTED_KVM_API_VERSION) {
343 fprintf(stderr, "kvm userspace version too old\n");
344 goto out_close;
346 kvm_abi = r;
347 kvm_page_size = getpagesize();
348 kvm = malloc(sizeof(*kvm));
349 if (kvm == NULL)
350 goto out_close;
351 memset(kvm, 0, sizeof(*kvm));
352 kvm->fd = fd;
353 kvm->vm_fd = -1;
354 kvm->callbacks = callbacks;
355 kvm->opaque = opaque;
356 kvm->dirty_pages_log_all = 0;
357 kvm->no_irqchip_creation = 0;
358 kvm->no_pit_creation = 0;
360 gsi_count = kvm_get_gsi_count(kvm);
361 if (gsi_count > 0) {
362 int gsi_bits, i;
364 /* Round up so we can search ints using ffs */
365 gsi_bits = ALIGN(gsi_count, 32);
366 kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
367 if (!kvm->used_gsi_bitmap)
368 goto out_close;
369 memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
370 kvm->max_gsi = gsi_bits;
372 /* Mark any over-allocated bits as already in use */
373 for (i = gsi_count; i < gsi_bits; i++)
374 set_gsi(kvm, i);
377 return kvm;
378 out_close:
379 close(fd);
380 return NULL;
383 void kvm_finalize(kvm_context_t kvm)
385 /* FIXME
386 if (kvm->vcpu_fd[0] != -1)
387 close(kvm->vcpu_fd[0]);
388 if (kvm->vm_fd != -1)
389 close(kvm->vm_fd);
391 close(kvm->fd);
392 free(kvm);
395 void kvm_disable_irqchip_creation(kvm_context_t kvm)
397 kvm->no_irqchip_creation = 1;
400 void kvm_disable_pit_creation(kvm_context_t kvm)
402 kvm->no_pit_creation = 1;
405 kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id)
407 long mmap_size;
408 int r;
409 kvm_vcpu_context_t vcpu_ctx = malloc(sizeof(struct kvm_vcpu_context));
411 if (!vcpu_ctx) {
412 errno = ENOMEM;
413 return NULL;
416 vcpu_ctx->kvm = kvm;
417 vcpu_ctx->id = id;
419 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id);
420 if (r == -1) {
421 fprintf(stderr, "kvm_create_vcpu: %m\n");
422 goto err;
424 vcpu_ctx->fd = r;
425 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
426 if (mmap_size == -1) {
427 fprintf(stderr, "get vcpu mmap size: %m\n");
428 goto err_fd;
430 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
431 vcpu_ctx->fd, 0);
432 if (vcpu_ctx->run == MAP_FAILED) {
433 fprintf(stderr, "mmap vcpu area: %m\n");
434 goto err_fd;
436 return vcpu_ctx;
437 err_fd:
438 close(vcpu_ctx->fd);
439 err:
440 free(vcpu_ctx);
441 return NULL;
444 int kvm_create_vm(kvm_context_t kvm)
446 int fd = kvm->fd;
448 #ifdef KVM_CAP_IRQ_ROUTING
449 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
450 if (!kvm->irq_routes)
451 return -ENOMEM;
452 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
453 kvm->nr_allocated_irq_routes = 0;
454 #endif
456 fd = ioctl(fd, KVM_CREATE_VM, 0);
457 if (fd == -1) {
458 fprintf(stderr, "kvm_create_vm: %m\n");
459 return -1;
461 kvm->vm_fd = fd;
462 return 0;
465 static int kvm_create_default_phys_mem(kvm_context_t kvm,
466 unsigned long phys_mem_bytes,
467 void **vm_mem)
469 #ifdef KVM_CAP_USER_MEMORY
470 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
471 if (r > 0)
472 return 0;
473 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
474 #else
475 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
476 #endif
477 return -1;
480 int kvm_check_extension(kvm_context_t kvm, int ext)
482 int ret;
484 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
485 if (ret > 0)
486 return ret;
487 return 0;
490 void kvm_create_irqchip(kvm_context_t kvm)
492 int r;
494 kvm->irqchip_in_kernel = 0;
495 #ifdef KVM_CAP_IRQCHIP
496 if (!kvm->no_irqchip_creation) {
497 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
498 if (r > 0) { /* kernel irqchip supported */
499 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
500 if (r >= 0) {
501 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
502 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
503 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
504 KVM_CAP_IRQ_INJECT_STATUS);
505 if (r > 0)
506 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
507 #endif
508 kvm->irqchip_in_kernel = 1;
510 else
511 fprintf(stderr, "Create kernel PIC irqchip failed\n");
514 #endif
517 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
519 int r;
521 r = kvm_create_vm(kvm);
522 if (r < 0)
523 return r;
524 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
525 if (r < 0)
526 return r;
527 init_slots();
528 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
529 if (r < 0)
530 return r;
531 kvm_create_irqchip(kvm);
533 return 0;
537 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
538 unsigned long len, int log, int writable)
540 int r;
541 int prot = PROT_READ;
542 void *ptr;
543 struct kvm_userspace_memory_region memory = {
544 .memory_size = len,
545 .guest_phys_addr = phys_start,
546 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
549 if (writable)
550 prot |= PROT_WRITE;
552 #if !defined(__s390__)
553 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
554 #else
555 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
556 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
557 #endif
558 if (ptr == MAP_FAILED) {
559 fprintf(stderr, "%s: %s", __func__, strerror(errno));
560 return 0;
563 memset(ptr, 0, len);
565 memory.userspace_addr = (unsigned long)ptr;
566 memory.slot = get_free_slot(kvm);
567 DPRINTF("slot %d start %llx len %llx flags %x\n",
568 memory.slot,
569 memory.guest_phys_addr,
570 memory.memory_size,
571 memory.flags);
572 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
573 if (r == -1) {
574 fprintf(stderr, "%s: %s", __func__, strerror(errno));
575 return 0;
577 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
578 memory.userspace_addr, memory.flags);
580 return ptr;
583 int kvm_register_phys_mem(kvm_context_t kvm,
584 unsigned long phys_start, void *userspace_addr,
585 unsigned long len, int log)
588 struct kvm_userspace_memory_region memory = {
589 .memory_size = len,
590 .guest_phys_addr = phys_start,
591 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
592 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
594 int r;
596 memory.slot = get_free_slot(kvm);
597 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
598 memory.guest_phys_addr, memory.memory_size,
599 memory.userspace_addr, memory.slot, memory.flags);
600 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
601 if (r == -1) {
602 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
603 return -1;
605 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
606 memory.userspace_addr, memory.flags);
607 return 0;
611 /* destroy/free a whole slot.
612 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
614 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
615 unsigned long len)
617 int slot;
618 int r;
619 struct kvm_userspace_memory_region memory = {
620 .memory_size = 0,
621 .guest_phys_addr = phys_start,
622 .userspace_addr = 0,
623 .flags = 0,
626 slot = get_slot(phys_start);
628 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
629 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
630 __FUNCTION__, slot);
631 return;
633 if (phys_start != slots[slot].phys_addr) {
634 fprintf(stderr,
635 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
636 __FUNCTION__, phys_start, slots[slot].phys_addr);
637 phys_start = slots[slot].phys_addr;
640 memory.slot = slot;
641 DPRINTF("slot %d start %llx len %llx flags %x\n",
642 memory.slot,
643 memory.guest_phys_addr,
644 memory.memory_size,
645 memory.flags);
646 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
647 if (r == -1) {
648 fprintf(stderr, "destroy_userspace_phys_mem: %s",
649 strerror(errno));
650 return;
653 free_slot(memory.slot);
656 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
659 int slot = get_container_slot(phys_addr, size);
661 if (slot != -1) {
662 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
663 kvm_destroy_phys_mem(kvm, phys_addr, size);
664 return;
668 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
670 int r;
671 struct kvm_dirty_log log = {
672 .slot = slot,
675 log.dirty_bitmap = buf;
677 r = ioctl(kvm->vm_fd, ioctl_num, &log);
678 if (r == -1)
679 return -errno;
680 return 0;
683 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
685 int slot;
687 slot = get_slot(phys_addr);
688 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
691 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
692 unsigned long len, void *buf, void *opaque,
693 int (*cb)(unsigned long start, unsigned long len,
694 void*bitmap, void *opaque))
696 int i;
697 int r;
698 unsigned long end_addr = phys_addr + len;
700 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
701 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
702 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
703 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
704 if (r)
705 return r;
706 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
707 if (r)
708 return r;
711 return 0;
714 #ifdef KVM_CAP_IRQCHIP
716 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
718 struct kvm_irq_level event;
719 int r;
721 if (!kvm->irqchip_in_kernel)
722 return 0;
723 event.level = level;
724 event.irq = irq;
725 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
726 if (r == -1)
727 perror("kvm_set_irq_level");
729 if (status) {
730 #ifdef KVM_CAP_IRQ_INJECT_STATUS
731 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
732 1 : event.status;
733 #else
734 *status = 1;
735 #endif
738 return 1;
741 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
743 int r;
745 if (!kvm->irqchip_in_kernel)
746 return 0;
747 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
748 if (r == -1) {
749 r = -errno;
750 perror("kvm_get_irqchip\n");
752 return r;
755 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
757 int r;
759 if (!kvm->irqchip_in_kernel)
760 return 0;
761 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
762 if (r == -1) {
763 r = -errno;
764 perror("kvm_set_irqchip\n");
766 return r;
769 #endif
771 static int handle_io(kvm_vcpu_context_t vcpu)
773 struct kvm_run *run = vcpu->run;
774 kvm_context_t kvm = vcpu->kvm;
775 uint16_t addr = run->io.port;
776 int r;
777 int i;
778 void *p = (void *)run + run->io.data_offset;
780 for (i = 0; i < run->io.count; ++i) {
781 switch (run->io.direction) {
782 case KVM_EXIT_IO_IN:
783 switch (run->io.size) {
784 case 1:
785 r = kvm->callbacks->inb(kvm->opaque, addr, p);
786 break;
787 case 2:
788 r = kvm->callbacks->inw(kvm->opaque, addr, p);
789 break;
790 case 4:
791 r = kvm->callbacks->inl(kvm->opaque, addr, p);
792 break;
793 default:
794 fprintf(stderr, "bad I/O size %d\n", run->io.size);
795 return -EMSGSIZE;
797 break;
798 case KVM_EXIT_IO_OUT:
799 switch (run->io.size) {
800 case 1:
801 r = kvm->callbacks->outb(kvm->opaque, addr,
802 *(uint8_t *)p);
803 break;
804 case 2:
805 r = kvm->callbacks->outw(kvm->opaque, addr,
806 *(uint16_t *)p);
807 break;
808 case 4:
809 r = kvm->callbacks->outl(kvm->opaque, addr,
810 *(uint32_t *)p);
811 break;
812 default:
813 fprintf(stderr, "bad I/O size %d\n", run->io.size);
814 return -EMSGSIZE;
816 break;
817 default:
818 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
819 return -EPROTO;
822 p += run->io.size;
825 return 0;
828 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
830 #ifdef KVM_CAP_SET_GUEST_DEBUG
831 struct kvm_run *run = vcpu->run;
832 kvm_context_t kvm = vcpu->kvm;
834 return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch);
835 #else
836 return 0;
837 #endif
840 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
842 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
845 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
847 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
850 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
852 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
855 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
857 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
860 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
862 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
865 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
867 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
870 #ifdef KVM_CAP_MP_STATE
871 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
873 int r;
875 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
876 if (r > 0)
877 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
878 return -ENOSYS;
881 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
883 int r;
885 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
886 if (r > 0)
887 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
888 return -ENOSYS;
890 #endif
892 static int handle_mmio(kvm_vcpu_context_t vcpu)
894 unsigned long addr = vcpu->run->mmio.phys_addr;
895 kvm_context_t kvm = vcpu->kvm;
896 struct kvm_run *kvm_run = vcpu->run;
897 void *data = kvm_run->mmio.data;
899 /* hack: Red Hat 7.1 generates these weird accesses. */
900 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
901 return 0;
903 if (kvm_run->mmio.is_write)
904 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
905 kvm_run->mmio.len);
906 else
907 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
908 kvm_run->mmio.len);
911 int handle_io_window(kvm_context_t kvm)
913 return kvm->callbacks->io_window(kvm->opaque);
916 int handle_halt(kvm_vcpu_context_t vcpu)
918 return vcpu->kvm->callbacks->halt(vcpu->kvm->opaque, vcpu);
921 int handle_shutdown(kvm_context_t kvm, void *env)
923 return kvm->callbacks->shutdown(kvm->opaque, env);
926 int try_push_interrupts(kvm_context_t kvm)
928 return kvm->callbacks->try_push_interrupts(kvm->opaque);
931 static inline void push_nmi(kvm_context_t kvm)
933 #ifdef KVM_CAP_USER_NMI
934 kvm->callbacks->push_nmi(kvm->opaque);
935 #endif /* KVM_CAP_USER_NMI */
938 void post_kvm_run(kvm_context_t kvm, void *env)
940 kvm->callbacks->post_kvm_run(kvm->opaque, env);
943 int pre_kvm_run(kvm_context_t kvm, void *env)
945 return kvm->callbacks->pre_kvm_run(kvm->opaque, env);
948 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
950 return vcpu->run->if_flag;
953 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
955 return vcpu->run->ready_for_interrupt_injection;
958 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
960 int r;
961 int fd = vcpu->fd;
962 struct kvm_run *run = vcpu->run;
963 kvm_context_t kvm = vcpu->kvm;
965 again:
966 push_nmi(kvm);
967 #if !defined(__s390__)
968 if (!kvm->irqchip_in_kernel)
969 run->request_interrupt_window = try_push_interrupts(kvm);
970 #endif
971 r = pre_kvm_run(kvm, env);
972 if (r)
973 return r;
974 r = ioctl(fd, KVM_RUN, 0);
976 if (r == -1 && errno != EINTR && errno != EAGAIN) {
977 r = -errno;
978 post_kvm_run(kvm, env);
979 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
980 return r;
983 post_kvm_run(kvm, env);
985 #if defined(KVM_CAP_COALESCED_MMIO)
986 if (kvm->coalesced_mmio) {
987 struct kvm_coalesced_mmio_ring *ring = (void *)run +
988 kvm->coalesced_mmio * PAGE_SIZE;
989 while (ring->first != ring->last) {
990 kvm->callbacks->mmio_write(kvm->opaque,
991 ring->coalesced_mmio[ring->first].phys_addr,
992 &ring->coalesced_mmio[ring->first].data[0],
993 ring->coalesced_mmio[ring->first].len);
994 smp_wmb();
995 ring->first = (ring->first + 1) %
996 KVM_COALESCED_MMIO_MAX;
999 #endif
1001 #if !defined(__s390__)
1002 if (r == -1) {
1003 r = handle_io_window(kvm);
1004 goto more;
1006 #endif
1007 if (1) {
1008 switch (run->exit_reason) {
1009 case KVM_EXIT_UNKNOWN:
1010 r = kvm->callbacks->unhandled(kvm, vcpu,
1011 run->hw.hardware_exit_reason);
1012 break;
1013 case KVM_EXIT_FAIL_ENTRY:
1014 r = kvm->callbacks->unhandled(kvm, vcpu,
1015 run->fail_entry.hardware_entry_failure_reason);
1016 break;
1017 case KVM_EXIT_EXCEPTION:
1018 fprintf(stderr, "exception %d (%x)\n",
1019 run->ex.exception,
1020 run->ex.error_code);
1021 kvm_show_regs(vcpu);
1022 kvm_show_code(vcpu);
1023 abort();
1024 break;
1025 case KVM_EXIT_IO:
1026 r = handle_io(vcpu);
1027 break;
1028 case KVM_EXIT_DEBUG:
1029 r = handle_debug(vcpu, env);
1030 break;
1031 case KVM_EXIT_MMIO:
1032 r = handle_mmio(vcpu);
1033 break;
1034 case KVM_EXIT_HLT:
1035 r = handle_halt(vcpu);
1036 break;
1037 case KVM_EXIT_IRQ_WINDOW_OPEN:
1038 break;
1039 case KVM_EXIT_SHUTDOWN:
1040 r = handle_shutdown(kvm, env);
1041 break;
1042 #if defined(__s390__)
1043 case KVM_EXIT_S390_SIEIC:
1044 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
1045 run);
1046 break;
1047 case KVM_EXIT_S390_RESET:
1048 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
1049 break;
1050 #endif
1051 default:
1052 if (kvm_arch_run(vcpu)) {
1053 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1054 run->exit_reason);
1055 kvm_show_regs(vcpu);
1056 abort();
1058 break;
1061 more:
1062 if (!r)
1063 goto again;
1064 return r;
1067 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1069 struct kvm_interrupt intr;
1071 intr.irq = irq;
1072 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1075 #ifdef KVM_CAP_SET_GUEST_DEBUG
1076 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1078 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1080 #endif
1082 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1084 struct kvm_signal_mask *sigmask;
1085 int r;
1087 if (!sigset) {
1088 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1089 if (r == -1)
1090 r = -errno;
1091 return r;
1093 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1094 if (!sigmask)
1095 return -ENOMEM;
1097 sigmask->len = 8;
1098 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1099 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1100 if (r == -1)
1101 r = -errno;
1102 free(sigmask);
1103 return r;
1106 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1108 return kvm->irqchip_in_kernel;
1111 int kvm_pit_in_kernel(kvm_context_t kvm)
1113 return kvm->pit_in_kernel;
1116 int kvm_has_sync_mmu(void)
1118 int r = 0;
1119 #ifdef KVM_CAP_SYNC_MMU
1120 r = ioctl(kvm_context->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1121 #endif
1122 return r;
1125 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1127 #ifdef KVM_CAP_USER_NMI
1128 return ioctl(vcpu->fd, KVM_NMI);
1129 #else
1130 return -ENOSYS;
1131 #endif
1134 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1136 int r = 0;
1137 kvm->coalesced_mmio = 0;
1138 #ifdef KVM_CAP_COALESCED_MMIO
1139 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1140 if (r > 0) {
1141 kvm->coalesced_mmio = r;
1142 return 0;
1144 #endif
1145 return r;
1148 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1150 #ifdef KVM_CAP_COALESCED_MMIO
1151 struct kvm_coalesced_mmio_zone zone;
1152 int r;
1154 if (kvm->coalesced_mmio) {
1156 zone.addr = addr;
1157 zone.size = size;
1159 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1160 if (r == -1) {
1161 perror("kvm_register_coalesced_mmio_zone");
1162 return -errno;
1164 return 0;
1166 #endif
1167 return -ENOSYS;
1170 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1172 #ifdef KVM_CAP_COALESCED_MMIO
1173 struct kvm_coalesced_mmio_zone zone;
1174 int r;
1176 if (kvm->coalesced_mmio) {
1178 zone.addr = addr;
1179 zone.size = size;
1181 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1182 if (r == -1) {
1183 perror("kvm_unregister_coalesced_mmio_zone");
1184 return -errno;
1186 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1187 return 0;
1189 #endif
1190 return -ENOSYS;
1193 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1194 int kvm_assign_pci_device(kvm_context_t kvm,
1195 struct kvm_assigned_pci_dev *assigned_dev)
1197 int ret;
1199 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1200 if (ret < 0)
1201 return -errno;
1203 return ret;
1206 static int kvm_old_assign_irq(kvm_context_t kvm,
1207 struct kvm_assigned_irq *assigned_irq)
1209 int ret;
1211 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1212 if (ret < 0)
1213 return -errno;
1215 return ret;
1218 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1219 int kvm_assign_irq(kvm_context_t kvm,
1220 struct kvm_assigned_irq *assigned_irq)
1222 int ret;
1224 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1225 if (ret > 0) {
1226 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1227 if (ret < 0)
1228 return -errno;
1229 return ret;
1232 return kvm_old_assign_irq(kvm, assigned_irq);
1235 int kvm_deassign_irq(kvm_context_t kvm,
1236 struct kvm_assigned_irq *assigned_irq)
1238 int ret;
1240 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1241 if (ret < 0)
1242 return -errno;
1244 return ret;
1246 #else
1247 int kvm_assign_irq(kvm_context_t kvm,
1248 struct kvm_assigned_irq *assigned_irq)
1250 return kvm_old_assign_irq(kvm, assigned_irq);
1252 #endif
1253 #endif
1255 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1256 int kvm_deassign_pci_device(kvm_context_t kvm,
1257 struct kvm_assigned_pci_dev *assigned_dev)
1259 int ret;
1261 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1262 if (ret < 0)
1263 return -errno;
1265 return ret;
1267 #endif
1269 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1271 int ret = 0;
1273 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1274 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1275 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1276 if (ret <= 0)
1277 ret = 0;
1278 #endif
1279 return ret;
1282 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1284 #ifdef KVM_CAP_REINJECT_CONTROL
1285 int r;
1286 struct kvm_reinject_control control;
1288 control.pit_reinject = pit_reinject;
1290 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1291 if (r > 0) {
1292 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1293 if (r == -1)
1294 return -errno;
1295 return r;
1297 #endif
1298 return -ENOSYS;
1301 int kvm_has_gsi_routing(kvm_context_t kvm)
1303 int r = 0;
1305 #ifdef KVM_CAP_IRQ_ROUTING
1306 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1307 #endif
1308 return r;
1311 int kvm_get_gsi_count(kvm_context_t kvm)
1313 #ifdef KVM_CAP_IRQ_ROUTING
1314 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1315 #else
1316 return -EINVAL;
1317 #endif
1320 int kvm_clear_gsi_routes(kvm_context_t kvm)
1322 #ifdef KVM_CAP_IRQ_ROUTING
1323 kvm->irq_routes->nr = 0;
1324 return 0;
1325 #else
1326 return -EINVAL;
1327 #endif
1330 int kvm_add_routing_entry(kvm_context_t kvm,
1331 struct kvm_irq_routing_entry* entry)
1333 #ifdef KVM_CAP_IRQ_ROUTING
1334 struct kvm_irq_routing *z;
1335 struct kvm_irq_routing_entry *new;
1336 int n, size;
1338 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1339 n = kvm->nr_allocated_irq_routes * 2;
1340 if (n < 64)
1341 n = 64;
1342 size = sizeof(struct kvm_irq_routing);
1343 size += n * sizeof(*new);
1344 z = realloc(kvm->irq_routes, size);
1345 if (!z)
1346 return -ENOMEM;
1347 kvm->nr_allocated_irq_routes = n;
1348 kvm->irq_routes = z;
1350 n = kvm->irq_routes->nr++;
1351 new = &kvm->irq_routes->entries[n];
1352 memset(new, 0, sizeof(*new));
1353 new->gsi = entry->gsi;
1354 new->type = entry->type;
1355 new->flags = entry->flags;
1356 new->u = entry->u;
1358 set_gsi(kvm, entry->gsi);
1360 return 0;
1361 #else
1362 return -ENOSYS;
1363 #endif
1366 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1368 #ifdef KVM_CAP_IRQ_ROUTING
1369 struct kvm_irq_routing_entry e;
1371 e.gsi = gsi;
1372 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1373 e.flags = 0;
1374 e.u.irqchip.irqchip = irqchip;
1375 e.u.irqchip.pin = pin;
1376 return kvm_add_routing_entry(kvm, &e);
1377 #else
1378 return -ENOSYS;
1379 #endif
1382 int kvm_del_routing_entry(kvm_context_t kvm,
1383 struct kvm_irq_routing_entry* entry)
1385 #ifdef KVM_CAP_IRQ_ROUTING
1386 struct kvm_irq_routing_entry *e, *p;
1387 int i, gsi, found = 0;
1389 gsi = entry->gsi;
1391 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1392 e = &kvm->irq_routes->entries[i];
1393 if (e->type == entry->type
1394 && e->gsi == gsi) {
1395 switch (e->type)
1397 case KVM_IRQ_ROUTING_IRQCHIP: {
1398 if (e->u.irqchip.irqchip ==
1399 entry->u.irqchip.irqchip
1400 && e->u.irqchip.pin ==
1401 entry->u.irqchip.pin) {
1402 p = &kvm->irq_routes->
1403 entries[--kvm->irq_routes->nr];
1404 *e = *p;
1405 found = 1;
1407 break;
1409 case KVM_IRQ_ROUTING_MSI: {
1410 if (e->u.msi.address_lo ==
1411 entry->u.msi.address_lo
1412 && e->u.msi.address_hi ==
1413 entry->u.msi.address_hi
1414 && e->u.msi.data == entry->u.msi.data) {
1415 p = &kvm->irq_routes->
1416 entries[--kvm->irq_routes->nr];
1417 *e = *p;
1418 found = 1;
1420 break;
1422 default:
1423 break;
1425 if (found) {
1426 /* If there are no other users of this GSI
1427 * mark it available in the bitmap */
1428 for (i = 0; i < kvm->irq_routes->nr; i++) {
1429 e = &kvm->irq_routes->entries[i];
1430 if (e->gsi == gsi)
1431 break;
1433 if (i == kvm->irq_routes->nr)
1434 clear_gsi(kvm, gsi);
1436 return 0;
1440 return -ESRCH;
1441 #else
1442 return -ENOSYS;
1443 #endif
1446 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1448 #ifdef KVM_CAP_IRQ_ROUTING
1449 struct kvm_irq_routing_entry e;
1451 e.gsi = gsi;
1452 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1453 e.flags = 0;
1454 e.u.irqchip.irqchip = irqchip;
1455 e.u.irqchip.pin = pin;
1456 return kvm_del_routing_entry(kvm, &e);
1457 #else
1458 return -ENOSYS;
1459 #endif
1462 int kvm_commit_irq_routes(kvm_context_t kvm)
1464 #ifdef KVM_CAP_IRQ_ROUTING
1465 int r;
1467 kvm->irq_routes->flags = 0;
1468 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1469 if (r == -1)
1470 r = -errno;
1471 return r;
1472 #else
1473 return -ENOSYS;
1474 #endif
1477 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1479 int i, bit;
1480 uint32_t *buf = kvm->used_gsi_bitmap;
1482 /* Return the lowest unused GSI in the bitmap */
1483 for (i = 0; i < kvm->max_gsi / 32; i++) {
1484 bit = ffs(~buf[i]);
1485 if (!bit)
1486 continue;
1488 return bit - 1 + i * 32;
1491 return -ENOSPC;
1494 #ifdef KVM_CAP_DEVICE_MSIX
1495 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1496 struct kvm_assigned_msix_nr *msix_nr)
1498 int ret;
1500 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1501 if (ret < 0)
1502 return -errno;
1504 return ret;
1507 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1508 struct kvm_assigned_msix_entry *entry)
1510 int ret;
1512 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1513 if (ret < 0)
1514 return -errno;
1516 return ret;
1518 #endif
1520 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1522 #include <sys/eventfd.h>
1524 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1526 int r;
1527 struct kvm_irqfd data = {
1528 .fd = fd,
1529 .gsi = gsi,
1530 .flags = flags,
1533 r = ioctl(kvm->vm_fd, KVM_IRQFD, &data);
1534 if (r == -1)
1535 r = -errno;
1536 return r;
1539 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1541 int r;
1542 int fd;
1544 if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
1545 return -ENOENT;
1547 fd = eventfd(0, 0);
1548 if (fd < 0)
1549 return -errno;
1551 r = _kvm_irqfd(kvm, fd, gsi, 0);
1552 if (r < 0) {
1553 close(fd);
1554 return -errno;
1557 return fd;
1560 #else /* KVM_CAP_IRQFD */
1562 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1564 return -ENOSYS;
1567 #endif /* KVM_CAP_IRQFD */
1568 static inline unsigned long kvm_get_thread_id(void)
1570 return syscall(SYS_gettid);
1573 static void qemu_cond_wait(pthread_cond_t *cond)
1575 CPUState *env = cpu_single_env;
1576 static const struct timespec ts = {
1577 .tv_sec = 0,
1578 .tv_nsec = 100000,
1581 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1582 cpu_single_env = env;
1585 static void sig_ipi_handler(int n)
1589 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1591 struct qemu_work_item wi;
1593 if (env == current_env) {
1594 func(data);
1595 return;
1598 wi.func = func;
1599 wi.data = data;
1600 if (!env->kvm_cpu_state.queued_work_first)
1601 env->kvm_cpu_state.queued_work_first = &wi;
1602 else
1603 env->kvm_cpu_state.queued_work_last->next = &wi;
1604 env->kvm_cpu_state.queued_work_last = &wi;
1605 wi.next = NULL;
1606 wi.done = false;
1608 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1609 while (!wi.done)
1610 qemu_cond_wait(&qemu_work_cond);
1613 static void inject_interrupt(void *data)
1615 cpu_interrupt(current_env, (long)data);
1618 void kvm_inject_interrupt(CPUState *env, int mask)
1620 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1623 void kvm_update_interrupt_request(CPUState *env)
1625 int signal = 0;
1627 if (env) {
1628 if (!current_env || !current_env->kvm_cpu_state.created)
1629 signal = 1;
1631 * Testing for created here is really redundant
1633 if (current_env && current_env->kvm_cpu_state.created &&
1634 env != current_env && !env->kvm_cpu_state.signalled)
1635 signal = 1;
1637 if (signal) {
1638 env->kvm_cpu_state.signalled = 1;
1639 if (env->kvm_cpu_state.thread)
1640 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1645 void kvm_update_after_sipi(CPUState *env)
1647 env->kvm_cpu_state.sipi_needed = 1;
1648 kvm_update_interrupt_request(env);
1651 void kvm_apic_init(CPUState *env)
1653 if (env->cpu_index != 0)
1654 env->kvm_cpu_state.init = 1;
1655 kvm_update_interrupt_request(env);
1658 #include <signal.h>
1660 static int kvm_try_push_interrupts(void *opaque)
1662 return kvm_arch_try_push_interrupts(opaque);
1665 static void kvm_post_run(void *opaque, void *data)
1667 CPUState *env = (CPUState *)data;
1669 pthread_mutex_lock(&qemu_mutex);
1670 kvm_arch_post_kvm_run(opaque, env);
1673 static int kvm_pre_run(void *opaque, void *data)
1675 CPUState *env = (CPUState *)data;
1677 kvm_arch_pre_kvm_run(opaque, env);
1679 if (env->exit_request)
1680 return 1;
1681 pthread_mutex_unlock(&qemu_mutex);
1682 return 0;
1685 static void kvm_do_load_registers(void *_env)
1687 CPUState *env = _env;
1689 kvm_arch_load_regs(env);
1692 void kvm_load_registers(CPUState *env)
1694 if (kvm_enabled() && qemu_system_ready)
1695 on_vcpu(env, kvm_do_load_registers, env);
1698 static void kvm_do_save_registers(void *_env)
1700 CPUState *env = _env;
1702 kvm_arch_save_regs(env);
1705 void kvm_save_registers(CPUState *env)
1707 if (kvm_enabled())
1708 on_vcpu(env, kvm_do_save_registers, env);
1711 static void kvm_do_load_mpstate(void *_env)
1713 CPUState *env = _env;
1715 kvm_arch_load_mpstate(env);
1718 void kvm_load_mpstate(CPUState *env)
1720 if (kvm_enabled() && qemu_system_ready)
1721 on_vcpu(env, kvm_do_load_mpstate, env);
1724 static void kvm_do_save_mpstate(void *_env)
1726 CPUState *env = _env;
1728 kvm_arch_save_mpstate(env);
1729 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1732 void kvm_save_mpstate(CPUState *env)
1734 if (kvm_enabled())
1735 on_vcpu(env, kvm_do_save_mpstate, env);
1738 int kvm_cpu_exec(CPUState *env)
1740 int r;
1742 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1743 if (r < 0) {
1744 printf("kvm_run returned %d\n", r);
1745 vm_stop(0);
1748 return 0;
1751 static int has_work(CPUState *env)
1753 if (!vm_running || (env && env->kvm_cpu_state.stopped))
1754 return 0;
1755 if (kvm_irqchip_in_kernel(kvm_context))
1756 return 1;
1757 if (!env->halted)
1758 return 1;
1759 return kvm_arch_has_work(env);
1762 static void flush_queued_work(CPUState *env)
1764 struct qemu_work_item *wi;
1766 if (!env->kvm_cpu_state.queued_work_first)
1767 return;
1769 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1770 env->kvm_cpu_state.queued_work_first = wi->next;
1771 wi->func(wi->data);
1772 wi->done = true;
1774 env->kvm_cpu_state.queued_work_last = NULL;
1775 pthread_cond_broadcast(&qemu_work_cond);
1778 static void kvm_main_loop_wait(CPUState *env, int timeout)
1780 struct timespec ts;
1781 int r, e;
1782 siginfo_t siginfo;
1783 sigset_t waitset;
1785 pthread_mutex_unlock(&qemu_mutex);
1787 ts.tv_sec = timeout / 1000;
1788 ts.tv_nsec = (timeout % 1000) * 1000000;
1789 sigemptyset(&waitset);
1790 sigaddset(&waitset, SIG_IPI);
1792 r = sigtimedwait(&waitset, &siginfo, &ts);
1793 e = errno;
1795 pthread_mutex_lock(&qemu_mutex);
1797 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1798 printf("sigtimedwait: %s\n", strerror(e));
1799 exit(1);
1802 cpu_single_env = env;
1803 flush_queued_work(env);
1805 if (env->kvm_cpu_state.stop) {
1806 env->kvm_cpu_state.stop = 0;
1807 env->kvm_cpu_state.stopped = 1;
1808 pthread_cond_signal(&qemu_pause_cond);
1811 env->kvm_cpu_state.signalled = 0;
1814 static int all_threads_paused(void)
1816 CPUState *penv = first_cpu;
1818 while (penv) {
1819 if (penv->kvm_cpu_state.stop)
1820 return 0;
1821 penv = (CPUState *)penv->next_cpu;
1824 return 1;
1827 static void pause_all_threads(void)
1829 CPUState *penv = first_cpu;
1831 while (penv) {
1832 if (penv != cpu_single_env) {
1833 penv->kvm_cpu_state.stop = 1;
1834 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1835 } else {
1836 penv->kvm_cpu_state.stop = 0;
1837 penv->kvm_cpu_state.stopped = 1;
1838 cpu_exit(penv);
1840 penv = (CPUState *)penv->next_cpu;
1843 while (!all_threads_paused())
1844 qemu_cond_wait(&qemu_pause_cond);
1847 static void resume_all_threads(void)
1849 CPUState *penv = first_cpu;
1851 assert(!cpu_single_env);
1853 while (penv) {
1854 penv->kvm_cpu_state.stop = 0;
1855 penv->kvm_cpu_state.stopped = 0;
1856 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1857 penv = (CPUState *)penv->next_cpu;
1861 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1863 if (running)
1864 resume_all_threads();
1865 else
1866 pause_all_threads();
1869 static void update_regs_for_sipi(CPUState *env)
1871 kvm_arch_update_regs_for_sipi(env);
1872 env->kvm_cpu_state.sipi_needed = 0;
1875 static void update_regs_for_init(CPUState *env)
1877 #ifdef TARGET_I386
1878 SegmentCache cs = env->segs[R_CS];
1879 #endif
1881 cpu_reset(env);
1883 #ifdef TARGET_I386
1884 /* restore SIPI vector */
1885 if(env->kvm_cpu_state.sipi_needed)
1886 env->segs[R_CS] = cs;
1887 #endif
1889 env->kvm_cpu_state.init = 0;
1890 kvm_arch_load_regs(env);
1893 static void setup_kernel_sigmask(CPUState *env)
1895 sigset_t set;
1897 sigemptyset(&set);
1898 sigaddset(&set, SIGUSR2);
1899 sigaddset(&set, SIGIO);
1900 sigaddset(&set, SIGALRM);
1901 sigprocmask(SIG_BLOCK, &set, NULL);
1903 sigprocmask(SIG_BLOCK, NULL, &set);
1904 sigdelset(&set, SIG_IPI);
1906 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1909 static void qemu_kvm_system_reset(void)
1911 CPUState *penv = first_cpu;
1913 pause_all_threads();
1915 qemu_system_reset();
1917 while (penv) {
1918 kvm_arch_cpu_reset(penv);
1919 penv = (CPUState *)penv->next_cpu;
1922 resume_all_threads();
1925 static int kvm_main_loop_cpu(CPUState *env)
1927 setup_kernel_sigmask(env);
1929 pthread_mutex_lock(&qemu_mutex);
1931 kvm_qemu_init_env(env);
1932 #ifdef TARGET_I386
1933 kvm_tpr_vcpu_start(env);
1934 #endif
1936 cpu_single_env = env;
1937 kvm_arch_load_regs(env);
1939 while (1) {
1940 while (!has_work(env))
1941 kvm_main_loop_wait(env, 1000);
1942 if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
1943 env->halted = 0;
1944 if (!kvm_irqchip_in_kernel(kvm_context)) {
1945 if (env->kvm_cpu_state.init)
1946 update_regs_for_init(env);
1947 if (env->kvm_cpu_state.sipi_needed)
1948 update_regs_for_sipi(env);
1950 if (!env->halted || kvm_irqchip_in_kernel(kvm_context))
1951 kvm_cpu_exec(env);
1952 env->exit_request = 0;
1953 env->exception_index = EXCP_INTERRUPT;
1954 kvm_main_loop_wait(env, 0);
1956 pthread_mutex_unlock(&qemu_mutex);
1957 return 0;
1960 static void *ap_main_loop(void *_env)
1962 CPUState *env = _env;
1963 sigset_t signals;
1964 struct ioperm_data *data = NULL;
1966 current_env = env;
1967 env->thread_id = kvm_get_thread_id();
1968 sigfillset(&signals);
1969 sigprocmask(SIG_BLOCK, &signals, NULL);
1970 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(kvm_context, env->cpu_index);
1972 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1973 /* do ioperm for io ports of assigned devices */
1974 LIST_FOREACH(data, &ioperm_head, entries)
1975 on_vcpu(env, kvm_arch_do_ioperm, data);
1976 #endif
1978 /* signal VCPU creation */
1979 pthread_mutex_lock(&qemu_mutex);
1980 current_env->kvm_cpu_state.created = 1;
1981 pthread_cond_signal(&qemu_vcpu_cond);
1983 /* and wait for machine initialization */
1984 while (!qemu_system_ready)
1985 qemu_cond_wait(&qemu_system_cond);
1986 pthread_mutex_unlock(&qemu_mutex);
1988 kvm_main_loop_cpu(env);
1989 return NULL;
1992 void kvm_init_vcpu(CPUState *env)
1994 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1996 while (env->kvm_cpu_state.created == 0)
1997 qemu_cond_wait(&qemu_vcpu_cond);
2000 int kvm_vcpu_inited(CPUState *env)
2002 return env->kvm_cpu_state.created;
2005 int kvm_init_ap(void)
2007 #ifdef TARGET_I386
2008 kvm_tpr_opt_setup();
2009 #endif
2010 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2012 signal(SIG_IPI, sig_ipi_handler);
2013 return 0;
2016 void qemu_kvm_notify_work(void)
2018 uint64_t value = 1;
2019 char buffer[8];
2020 size_t offset = 0;
2022 if (io_thread_fd == -1)
2023 return;
2025 memcpy(buffer, &value, sizeof(value));
2027 while (offset < 8) {
2028 ssize_t len;
2030 len = write(io_thread_fd, buffer + offset, 8 - offset);
2031 if (len == -1 && errno == EINTR)
2032 continue;
2034 if (len <= 0)
2035 break;
2037 offset += len;
2040 if (offset != 8)
2041 fprintf(stderr, "failed to notify io thread\n");
2044 /* If we have signalfd, we mask out the signals we want to handle and then
2045 * use signalfd to listen for them. We rely on whatever the current signal
2046 * handler is to dispatch the signals when we receive them.
2049 static void sigfd_handler(void *opaque)
2051 int fd = (unsigned long)opaque;
2052 struct qemu_signalfd_siginfo info;
2053 struct sigaction action;
2054 ssize_t len;
2056 while (1) {
2057 do {
2058 len = read(fd, &info, sizeof(info));
2059 } while (len == -1 && errno == EINTR);
2061 if (len == -1 && errno == EAGAIN)
2062 break;
2064 if (len != sizeof(info)) {
2065 printf("read from sigfd returned %zd: %m\n", len);
2066 return;
2069 sigaction(info.ssi_signo, NULL, &action);
2070 if (action.sa_handler)
2071 action.sa_handler(info.ssi_signo);
2076 /* Used to break IO thread out of select */
2077 static void io_thread_wakeup(void *opaque)
2079 int fd = (unsigned long)opaque;
2080 char buffer[8];
2081 size_t offset = 0;
2083 while (offset < 8) {
2084 ssize_t len;
2086 len = read(fd, buffer + offset, 8 - offset);
2087 if (len == -1 && errno == EINTR)
2088 continue;
2090 if (len <= 0)
2091 break;
2093 offset += len;
2097 int kvm_main_loop(void)
2099 int fds[2];
2100 sigset_t mask;
2101 int sigfd;
2103 io_thread = pthread_self();
2104 qemu_system_ready = 1;
2106 if (qemu_eventfd(fds) == -1) {
2107 fprintf(stderr, "failed to create eventfd\n");
2108 return -errno;
2111 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2112 (void *)(unsigned long)fds[0]);
2114 io_thread_fd = fds[1];
2116 sigemptyset(&mask);
2117 sigaddset(&mask, SIGIO);
2118 sigaddset(&mask, SIGALRM);
2119 sigprocmask(SIG_BLOCK, &mask, NULL);
2121 sigfd = qemu_signalfd(&mask);
2122 if (sigfd == -1) {
2123 fprintf(stderr, "failed to create signalfd\n");
2124 return -errno;
2127 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2129 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2130 (void *)(unsigned long)sigfd);
2132 pthread_cond_broadcast(&qemu_system_cond);
2134 io_thread_sigfd = sigfd;
2135 cpu_single_env = NULL;
2137 while (1) {
2138 main_loop_wait(1000);
2139 if (qemu_shutdown_requested()) {
2140 if (qemu_no_shutdown()) {
2141 vm_stop(0);
2142 } else
2143 break;
2144 } else if (qemu_powerdown_requested())
2145 qemu_system_powerdown();
2146 else if (qemu_reset_requested())
2147 qemu_kvm_system_reset();
2148 else if (kvm_debug_cpu_requested) {
2149 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2150 vm_stop(EXCP_DEBUG);
2151 kvm_debug_cpu_requested = NULL;
2155 pause_all_threads();
2156 pthread_mutex_unlock(&qemu_mutex);
2158 return 0;
2161 #ifdef KVM_CAP_SET_GUEST_DEBUG
2162 static int kvm_debug(void *opaque, void *data,
2163 struct kvm_debug_exit_arch *arch_info)
2165 int handle = kvm_arch_debug(arch_info);
2166 CPUState *env = data;
2168 if (handle) {
2169 kvm_debug_cpu_requested = env;
2170 env->kvm_cpu_state.stopped = 1;
2172 return handle;
2174 #endif
2176 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
2178 *data = cpu_inb(0, addr);
2179 return 0;
2182 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
2184 *data = cpu_inw(0, addr);
2185 return 0;
2188 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
2190 *data = cpu_inl(0, addr);
2191 return 0;
2194 #define PM_IO_BASE 0xb000
2196 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
2198 if (addr == 0xb2) {
2199 switch (data) {
2200 case 0: {
2201 cpu_outb(0, 0xb3, 0);
2202 break;
2204 case 0xf0: {
2205 unsigned x;
2207 /* enable acpi */
2208 x = cpu_inw(0, PM_IO_BASE + 4);
2209 x &= ~1;
2210 cpu_outw(0, PM_IO_BASE + 4, x);
2211 break;
2213 case 0xf1: {
2214 unsigned x;
2216 /* enable acpi */
2217 x = cpu_inw(0, PM_IO_BASE + 4);
2218 x |= 1;
2219 cpu_outw(0, PM_IO_BASE + 4, x);
2220 break;
2222 default:
2223 break;
2225 return 0;
2227 cpu_outb(0, addr, data);
2228 return 0;
2231 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
2233 cpu_outw(0, addr, data);
2234 return 0;
2237 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
2239 cpu_outl(0, addr, data);
2240 return 0;
2243 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
2245 cpu_physical_memory_rw(addr, data, len, 0);
2246 return 0;
2249 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
2251 cpu_physical_memory_rw(addr, data, len, 1);
2252 return 0;
2255 static int kvm_io_window(void *opaque)
2257 return 1;
2261 static int kvm_halt(void *opaque, kvm_vcpu_context_t vcpu)
2263 return kvm_arch_halt(opaque, vcpu);
2266 static int kvm_shutdown(void *opaque, void *data)
2268 CPUState *env = (CPUState *)data;
2270 /* stop the current vcpu from going back to guest mode */
2271 env->kvm_cpu_state.stopped = 1;
2273 qemu_system_reset_request();
2274 return 1;
2277 static int handle_unhandled(kvm_context_t kvm, kvm_vcpu_context_t vcpu,
2278 uint64_t reason)
2280 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
2281 return -EINVAL;
2284 static struct kvm_callbacks qemu_kvm_ops = {
2285 #ifdef KVM_CAP_SET_GUEST_DEBUG
2286 .debug = kvm_debug,
2287 #endif
2288 .inb = kvm_inb,
2289 .inw = kvm_inw,
2290 .inl = kvm_inl,
2291 .outb = kvm_outb,
2292 .outw = kvm_outw,
2293 .outl = kvm_outl,
2294 .mmio_read = kvm_mmio_read,
2295 .mmio_write = kvm_mmio_write,
2296 .halt = kvm_halt,
2297 .shutdown = kvm_shutdown,
2298 .io_window = kvm_io_window,
2299 .try_push_interrupts = kvm_try_push_interrupts,
2300 #ifdef KVM_CAP_USER_NMI
2301 .push_nmi = kvm_arch_push_nmi,
2302 #endif
2303 .post_kvm_run = kvm_post_run,
2304 .pre_kvm_run = kvm_pre_run,
2305 #ifdef TARGET_I386
2306 .tpr_access = handle_tpr_access,
2307 #endif
2308 #ifdef TARGET_PPC
2309 .powerpc_dcr_read = handle_powerpc_dcr_read,
2310 .powerpc_dcr_write = handle_powerpc_dcr_write,
2311 #endif
2312 .unhandled = handle_unhandled,
2315 int kvm_qemu_init()
2317 /* Try to initialize kvm */
2318 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
2319 if (!kvm_context) {
2320 return -1;
2322 pthread_mutex_lock(&qemu_mutex);
2324 return 0;
2327 #ifdef TARGET_I386
2328 static int destroy_region_works = 0;
2329 #endif
2332 #if !defined(TARGET_I386)
2333 int kvm_arch_init_irq_routing(void)
2335 return 0;
2337 #endif
2339 int kvm_qemu_create_context(void)
2341 int r;
2343 if (!kvm_irqchip) {
2344 kvm_disable_irqchip_creation(kvm_context);
2346 if (!kvm_pit) {
2347 kvm_disable_pit_creation(kvm_context);
2349 if (kvm_create(kvm_context, 0, NULL) < 0) {
2350 kvm_qemu_destroy();
2351 return -1;
2353 r = kvm_arch_qemu_create_context();
2354 if(r <0)
2355 kvm_qemu_destroy();
2356 if (kvm_pit && !kvm_pit_reinject) {
2357 if (kvm_reinject_control(kvm_context, 0)) {
2358 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2359 return -1;
2362 #ifdef TARGET_I386
2363 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2364 #endif
2366 r = kvm_arch_init_irq_routing();
2367 if (r < 0) {
2368 return r;
2371 return 0;
2374 void kvm_qemu_destroy(void)
2376 kvm_finalize(kvm_context);
2379 #ifdef TARGET_I386
2380 static int must_use_aliases_source(target_phys_addr_t addr)
2382 if (destroy_region_works)
2383 return false;
2384 if (addr == 0xa0000 || addr == 0xa8000)
2385 return true;
2386 return false;
2389 static int must_use_aliases_target(target_phys_addr_t addr)
2391 if (destroy_region_works)
2392 return false;
2393 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2394 return true;
2395 return false;
2398 static struct mapping {
2399 target_phys_addr_t phys;
2400 ram_addr_t ram;
2401 ram_addr_t len;
2402 } mappings[50];
2403 static int nr_mappings;
2405 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2407 struct mapping *p;
2409 for (p = mappings; p < mappings + nr_mappings; ++p) {
2410 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2411 return p;
2414 return NULL;
2417 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2419 struct mapping *p;
2421 for (p = mappings; p < mappings + nr_mappings; ++p) {
2422 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2423 return p;
2426 return NULL;
2429 static void drop_mapping(target_phys_addr_t start_addr)
2431 struct mapping *p = find_mapping(start_addr);
2433 if (p)
2434 *p = mappings[--nr_mappings];
2436 #endif
2438 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
2439 unsigned long size,
2440 unsigned long phys_offset)
2442 int r = 0;
2443 unsigned long area_flags;
2444 #ifdef TARGET_I386
2445 struct mapping *p;
2446 #endif
2448 if (start_addr + size > phys_ram_size) {
2449 phys_ram_size = start_addr + size;
2452 phys_offset &= ~IO_MEM_ROM;
2453 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2455 if (area_flags != IO_MEM_RAM) {
2456 #ifdef TARGET_I386
2457 if (must_use_aliases_source(start_addr)) {
2458 kvm_destroy_memory_alias(kvm_context, start_addr);
2459 return;
2461 if (must_use_aliases_target(start_addr))
2462 return;
2463 #endif
2464 while (size > 0) {
2465 p = find_mapping(start_addr);
2466 if (p) {
2467 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2468 drop_mapping(p->phys);
2470 start_addr += TARGET_PAGE_SIZE;
2471 if (size > TARGET_PAGE_SIZE) {
2472 size -= TARGET_PAGE_SIZE;
2473 } else {
2474 size = 0;
2477 return;
2480 r = kvm_is_containing_region(kvm_context, start_addr, size);
2481 if (r)
2482 return;
2484 if (area_flags >= TLB_MMIO)
2485 return;
2487 #ifdef TARGET_I386
2488 if (must_use_aliases_source(start_addr)) {
2489 p = find_ram_mapping(phys_offset);
2490 if (p) {
2491 kvm_create_memory_alias(kvm_context, start_addr, size,
2492 p->phys + (phys_offset - p->ram));
2494 return;
2496 #endif
2498 r = kvm_register_phys_mem(kvm_context, start_addr,
2499 qemu_get_ram_ptr(phys_offset),
2500 size, 0);
2501 if (r < 0) {
2502 printf("kvm_cpu_register_physical_memory: failed\n");
2503 exit(1);
2506 #ifdef TARGET_I386
2507 drop_mapping(start_addr);
2508 p = &mappings[nr_mappings++];
2509 p->phys = start_addr;
2510 p->ram = phys_offset;
2511 p->len = size;
2512 #endif
2514 return;
2517 void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr,
2518 target_phys_addr_t size,
2519 unsigned long phys_offset)
2521 kvm_unregister_memory_area(kvm_context, start_addr, size);
2524 int kvm_setup_guest_memory(void *area, unsigned long size)
2526 int ret = 0;
2528 #ifdef MADV_DONTFORK
2529 if (kvm_enabled() && !kvm_has_sync_mmu())
2530 ret = madvise(area, size, MADV_DONTFORK);
2531 #endif
2533 if (ret)
2534 perror ("madvise");
2536 return ret;
2539 int kvm_qemu_check_extension(int ext)
2541 return kvm_check_extension(kvm_context, ext);
2544 int kvm_qemu_init_env(CPUState *cenv)
2546 return kvm_arch_qemu_init_env(cenv);
2549 #ifdef KVM_CAP_SET_GUEST_DEBUG
2550 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
2551 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
2553 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
2555 struct kvm_sw_breakpoint *bp;
2557 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
2558 if (bp->pc == pc)
2559 return bp;
2561 return NULL;
2564 struct kvm_set_guest_debug_data {
2565 struct kvm_guest_debug dbg;
2566 int err;
2569 static void kvm_invoke_set_guest_debug(void *data)
2571 struct kvm_set_guest_debug_data *dbg_data = data;
2573 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2574 &dbg_data->dbg);
2577 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2579 struct kvm_set_guest_debug_data data;
2581 data.dbg.control = 0;
2582 if (env->singlestep_enabled)
2583 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2585 kvm_arch_update_guest_debug(env, &data.dbg);
2586 data.dbg.control |= reinject_trap;
2588 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2589 return data.err;
2592 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2593 target_ulong len, int type)
2595 struct kvm_sw_breakpoint *bp;
2596 CPUState *env;
2597 int err;
2599 if (type == GDB_BREAKPOINT_SW) {
2600 bp = kvm_find_sw_breakpoint(addr);
2601 if (bp) {
2602 bp->use_count++;
2603 return 0;
2606 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
2607 if (!bp)
2608 return -ENOMEM;
2610 bp->pc = addr;
2611 bp->use_count = 1;
2612 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
2613 if (err) {
2614 free(bp);
2615 return err;
2618 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
2619 } else {
2620 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
2621 if (err)
2622 return err;
2625 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2626 err = kvm_update_guest_debug(env, 0);
2627 if (err)
2628 return err;
2630 return 0;
2633 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2634 target_ulong len, int type)
2636 struct kvm_sw_breakpoint *bp;
2637 CPUState *env;
2638 int err;
2640 if (type == GDB_BREAKPOINT_SW) {
2641 bp = kvm_find_sw_breakpoint(addr);
2642 if (!bp)
2643 return -ENOENT;
2645 if (bp->use_count > 1) {
2646 bp->use_count--;
2647 return 0;
2650 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
2651 if (err)
2652 return err;
2654 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
2655 qemu_free(bp);
2656 } else {
2657 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
2658 if (err)
2659 return err;
2662 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2663 err = kvm_update_guest_debug(env, 0);
2664 if (err)
2665 return err;
2667 return 0;
2670 void kvm_remove_all_breakpoints(CPUState *current_env)
2672 struct kvm_sw_breakpoint *bp, *next;
2673 CPUState *env;
2675 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
2676 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
2677 /* Try harder to find a CPU that currently sees the breakpoint. */
2678 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2679 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
2680 break;
2684 kvm_arch_remove_all_hw_breakpoints();
2686 for (env = first_cpu; env != NULL; env = env->next_cpu)
2687 kvm_update_guest_debug(env, 0);
2690 #else /* !KVM_CAP_SET_GUEST_DEBUG */
2692 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2694 return -EINVAL;
2697 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2698 target_ulong len, int type)
2700 return -EINVAL;
2703 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2704 target_ulong len, int type)
2706 return -EINVAL;
2709 void kvm_remove_all_breakpoints(CPUState *current_env)
2712 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
2715 * dirty pages logging
2717 /* FIXME: use unsigned long pointer instead of unsigned char */
2718 unsigned char *kvm_dirty_bitmap = NULL;
2719 int kvm_physical_memory_set_dirty_tracking(int enable)
2721 int r = 0;
2723 if (!kvm_enabled())
2724 return 0;
2726 if (enable) {
2727 if (!kvm_dirty_bitmap) {
2728 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2729 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2730 if (kvm_dirty_bitmap == NULL) {
2731 perror("Failed to allocate dirty pages bitmap");
2732 r=-1;
2734 else {
2735 r = kvm_dirty_pages_log_enable_all(kvm_context);
2739 else {
2740 if (kvm_dirty_bitmap) {
2741 r = kvm_dirty_pages_log_reset(kvm_context);
2742 qemu_free(kvm_dirty_bitmap);
2743 kvm_dirty_bitmap = NULL;
2746 return r;
2749 /* get kvm's dirty pages bitmap and update qemu's */
2750 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2751 unsigned char *bitmap,
2752 unsigned long offset,
2753 unsigned long mem_size)
2755 unsigned int i, j, n=0;
2756 unsigned char c;
2757 unsigned long page_number, addr, addr1;
2758 ram_addr_t ram_addr;
2759 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2762 * bitmap-traveling is faster than memory-traveling (for addr...)
2763 * especially when most of the memory is not dirty.
2765 for (i=0; i<len; i++) {
2766 c = bitmap[i];
2767 while (c>0) {
2768 j = ffsl(c) - 1;
2769 c &= ~(1u<<j);
2770 page_number = i * 8 + j;
2771 addr1 = page_number * TARGET_PAGE_SIZE;
2772 addr = offset + addr1;
2773 ram_addr = cpu_get_physical_page_desc(addr);
2774 cpu_physical_memory_set_dirty(ram_addr);
2775 n++;
2778 return 0;
2780 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2781 void *bitmap, void *opaque)
2783 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2787 * get kvm's dirty pages bitmap and update qemu's
2788 * we only care about physical ram, which resides in slots 0 and 3
2790 int kvm_update_dirty_pages_log(void)
2792 int r = 0;
2795 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2796 kvm_dirty_bitmap, NULL,
2797 kvm_get_dirty_bitmap_cb);
2798 return r;
2801 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2802 int log)
2804 if (log)
2805 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2806 else {
2807 #ifdef TARGET_I386
2808 if (must_use_aliases_target(start))
2809 return;
2810 #endif
2811 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2815 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2817 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2818 unsigned int brsize = BITMAP_SIZE(ram_size);
2819 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2820 unsigned int extra_bytes = (extra_pages +7)/8;
2821 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2822 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2824 memset(bitmap, 0xFF, brsize + extra_bytes);
2825 memset(bitmap + hole_start, 0, hole_end - hole_start);
2826 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2828 return 0;
2831 #ifdef KVM_CAP_IRQCHIP
2833 int kvm_set_irq(int irq, int level, int *status)
2835 return kvm_set_irq_level(kvm_context, irq, level, status);
2838 #endif
2840 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2842 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2845 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
2846 unsigned long size, int log, int writable)
2848 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
2851 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
2852 unsigned long size)
2854 kvm_destroy_phys_mem(kvm_context, start_addr, size);
2857 void kvm_mutex_unlock(void)
2859 assert(!cpu_single_env);
2860 pthread_mutex_unlock(&qemu_mutex);
2863 void kvm_mutex_lock(void)
2865 pthread_mutex_lock(&qemu_mutex);
2866 cpu_single_env = NULL;
2869 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
2871 return kvm_register_coalesced_mmio(kvm_context, addr, size);
2874 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
2875 unsigned int size)
2877 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
2880 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
2882 return kvm_register_coalesced_mmio(kvm_context, start, size);
2885 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
2887 return kvm_unregister_coalesced_mmio(kvm_context, start, size);
2890 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2891 void kvm_add_ioperm_data(struct ioperm_data *data)
2893 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2896 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2898 struct ioperm_data *data;
2900 data = LIST_FIRST(&ioperm_head);
2901 while (data) {
2902 struct ioperm_data *next = LIST_NEXT(data, entries);
2904 if (data->start_port == start_port && data->num == num) {
2905 LIST_REMOVE(data, entries);
2906 qemu_free(data);
2909 data = next;
2913 void kvm_ioperm(CPUState *env, void *data)
2915 if (kvm_enabled() && qemu_system_ready)
2916 on_vcpu(env, kvm_arch_do_ioperm, data);
2919 #endif
2921 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2923 #ifndef TARGET_IA64
2924 void *buf;
2926 #ifdef TARGET_I386
2927 if (must_use_aliases_source(start_addr))
2928 return 0;
2929 #endif
2931 buf = qemu_malloc((end_addr - start_addr) / 8 + 2);
2932 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2933 buf, NULL, kvm_get_dirty_bitmap_cb);
2934 qemu_free(buf);
2935 #endif
2936 return 0;
2939 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2941 #ifdef TARGET_I386
2942 if (must_use_aliases_source(phys_addr))
2943 return 0;
2944 #endif
2946 #ifndef TARGET_IA64
2947 kvm_qemu_log_memory(phys_addr, len, 1);
2948 #endif
2949 return 0;
2952 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2954 #ifdef TARGET_I386
2955 if (must_use_aliases_source(phys_addr))
2956 return 0;
2957 #endif
2959 #ifndef TARGET_IA64
2960 kvm_qemu_log_memory(phys_addr, len, 0);
2961 #endif
2962 return 0;
2965 void qemu_kvm_cpu_stop(CPUState *env)
2967 if (kvm_enabled())
2968 env->kvm_cpu_state.stopped = 1;