Pull qemu headers into libkvm
[qemu-kvm/fedora.git] / libkvm-all.c
blob45679fb4335ab4e6303b04088a00154f8d1ac8d8
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #ifndef __user
18 #define __user /* temporary, until installed via make headers_install */
19 #endif
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
27 #endif
29 #include "sysemu.h"
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sys/mman.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <sys/ioctl.h>
38 #include <inttypes.h>
39 #include "libkvm-all.h"
41 #include "libkvm.h"
43 //#define DEBUG_MEMREG
44 #ifdef DEBUG_MEMREG
45 #define DPRINTF(fmt, args...) \
46 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
47 #else
48 #define DPRINTF(fmt, args...) do {} while (0)
49 #endif
51 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
53 int kvm_abi = EXPECTED_KVM_API_VERSION;
54 int kvm_page_size;
56 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
58 uint32_t *bitmap = kvm->used_gsi_bitmap;
60 if (gsi < kvm->max_gsi)
61 bitmap[gsi / 32] |= 1U << (gsi % 32);
62 else
63 DPRINTF("Invalid GSI %d\n");
66 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
68 uint32_t *bitmap = kvm->used_gsi_bitmap;
70 if (gsi < kvm->max_gsi)
71 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
72 else
73 DPRINTF("Invalid GSI %d\n");
76 struct slot_info {
77 unsigned long phys_addr;
78 unsigned long len;
79 unsigned long userspace_addr;
80 unsigned flags;
81 int logging_count;
84 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
86 static void init_slots(void)
88 int i;
90 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
91 slots[i].len = 0;
94 static int get_free_slot(kvm_context_t kvm)
96 int i;
97 int tss_ext;
99 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
100 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
101 #else
102 tss_ext = 0;
103 #endif
106 * on older kernels where the set tss ioctl is not supprted we must save
107 * slot 0 to hold the extended memory, as the vmx will use the last 3
108 * pages of this slot.
110 if (tss_ext > 0)
111 i = 0;
112 else
113 i = 1;
115 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
116 if (!slots[i].len)
117 return i;
118 return -1;
121 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
122 unsigned long userspace_addr, unsigned flags)
124 slots[slot].phys_addr = phys_addr;
125 slots[slot].len = len;
126 slots[slot].userspace_addr = userspace_addr;
127 slots[slot].flags = flags;
130 static void free_slot(int slot)
132 slots[slot].len = 0;
133 slots[slot].logging_count = 0;
136 static int get_slot(unsigned long phys_addr)
138 int i;
140 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
141 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
142 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
143 return i;
145 return -1;
148 /* Returns -1 if this slot is not totally contained on any other,
149 * and the number of the slot otherwise */
150 static int get_container_slot(uint64_t phys_addr, unsigned long size)
152 int i;
154 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
155 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
156 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
157 return i;
158 return -1;
161 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
163 int slot = get_container_slot(phys_addr, size);
164 if (slot == -1)
165 return 0;
166 return 1;
170 * dirty pages logging control
172 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
173 unsigned long phys_addr,
174 unsigned flags,
175 unsigned mask)
177 int r = -1;
178 int slot = get_slot(phys_addr);
180 if (slot == -1) {
181 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
182 return 1;
185 flags = (slots[slot].flags & ~mask) | flags;
186 if (flags == slots[slot].flags)
187 return 0;
188 slots[slot].flags = flags;
191 struct kvm_userspace_memory_region mem = {
192 .slot = slot,
193 .memory_size = slots[slot].len,
194 .guest_phys_addr = slots[slot].phys_addr,
195 .userspace_addr = slots[slot].userspace_addr,
196 .flags = slots[slot].flags,
200 DPRINTF("slot %d start %llx len %llx flags %x\n",
201 mem.slot,
202 mem.guest_phys_addr,
203 mem.memory_size,
204 mem.flags);
205 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
206 if (r == -1)
207 fprintf(stderr, "%s: %m\n", __FUNCTION__);
209 return r;
212 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
213 int (*change)(kvm_context_t kvm,
214 uint64_t start,
215 uint64_t len))
217 int i, r;
219 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
220 if (slots[i].len)
221 r = change(kvm, slots[i].phys_addr, slots[i].len);
223 return r;
226 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
227 uint64_t phys_addr,
228 uint64_t len)
230 int slot = get_slot(phys_addr);
232 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
233 if (slot == -1) {
234 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
235 return -EINVAL;
238 if (slots[slot].logging_count++)
239 return 0;
241 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
242 KVM_MEM_LOG_DIRTY_PAGES,
243 KVM_MEM_LOG_DIRTY_PAGES);
246 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
247 uint64_t phys_addr,
248 uint64_t len)
250 int slot = get_slot(phys_addr);
252 if (slot == -1) {
253 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
254 return -EINVAL;
257 if (--slots[slot].logging_count)
258 return 0;
260 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
262 KVM_MEM_LOG_DIRTY_PAGES);
266 * Enable dirty page logging for all memory regions
268 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
270 if (kvm->dirty_pages_log_all)
271 return 0;
272 kvm->dirty_pages_log_all = 1;
273 return kvm_dirty_pages_log_change_all(kvm,
274 kvm_dirty_pages_log_enable_slot);
278 * Enable dirty page logging only for memory regions that were created with
279 * dirty logging enabled (disable for all other memory regions).
281 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
283 if (!kvm->dirty_pages_log_all)
284 return 0;
285 kvm->dirty_pages_log_all = 0;
286 return kvm_dirty_pages_log_change_all(kvm,
287 kvm_dirty_pages_log_disable_slot);
291 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
292 void *opaque)
294 int fd;
295 kvm_context_t kvm;
296 int r, gsi_count;
298 fd = open("/dev/kvm", O_RDWR);
299 if (fd == -1) {
300 perror("open /dev/kvm");
301 return NULL;
303 r = ioctl(fd, KVM_GET_API_VERSION, 0);
304 if (r == -1) {
305 fprintf(stderr, "kvm kernel version too old: "
306 "KVM_GET_API_VERSION ioctl not supported\n");
307 goto out_close;
309 if (r < EXPECTED_KVM_API_VERSION) {
310 fprintf(stderr, "kvm kernel version too old: "
311 "We expect API version %d or newer, but got "
312 "version %d\n",
313 EXPECTED_KVM_API_VERSION, r);
314 goto out_close;
316 if (r > EXPECTED_KVM_API_VERSION) {
317 fprintf(stderr, "kvm userspace version too old\n");
318 goto out_close;
320 kvm_abi = r;
321 kvm_page_size = getpagesize();
322 kvm = malloc(sizeof(*kvm));
323 if (kvm == NULL)
324 goto out_close;
325 memset(kvm, 0, sizeof(*kvm));
326 kvm->fd = fd;
327 kvm->vm_fd = -1;
328 kvm->callbacks = callbacks;
329 kvm->opaque = opaque;
330 kvm->dirty_pages_log_all = 0;
331 kvm->no_irqchip_creation = 0;
332 kvm->no_pit_creation = 0;
334 gsi_count = kvm_get_gsi_count(kvm);
335 if (gsi_count > 0) {
336 int gsi_bits, i;
338 /* Round up so we can search ints using ffs */
339 gsi_bits = ALIGN(gsi_count, 32);
340 kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
341 if (!kvm->used_gsi_bitmap)
342 goto out_close;
343 memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
344 kvm->max_gsi = gsi_bits;
346 /* Mark any over-allocated bits as already in use */
347 for (i = gsi_count; i < gsi_bits; i++)
348 set_gsi(kvm, i);
351 return kvm;
352 out_close:
353 close(fd);
354 return NULL;
357 void kvm_finalize(kvm_context_t kvm)
359 /* FIXME
360 if (kvm->vcpu_fd[0] != -1)
361 close(kvm->vcpu_fd[0]);
362 if (kvm->vm_fd != -1)
363 close(kvm->vm_fd);
365 close(kvm->fd);
366 free(kvm);
369 void kvm_disable_irqchip_creation(kvm_context_t kvm)
371 kvm->no_irqchip_creation = 1;
374 void kvm_disable_pit_creation(kvm_context_t kvm)
376 kvm->no_pit_creation = 1;
379 kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id)
381 long mmap_size;
382 int r;
383 kvm_vcpu_context_t vcpu_ctx = malloc(sizeof(struct kvm_vcpu_context));
385 if (!vcpu_ctx) {
386 errno = ENOMEM;
387 return NULL;
390 vcpu_ctx->kvm = kvm;
391 vcpu_ctx->id = id;
393 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id);
394 if (r == -1) {
395 fprintf(stderr, "kvm_create_vcpu: %m\n");
396 goto err;
398 vcpu_ctx->fd = r;
399 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
400 if (mmap_size == -1) {
401 fprintf(stderr, "get vcpu mmap size: %m\n");
402 goto err_fd;
404 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
405 vcpu_ctx->fd, 0);
406 if (vcpu_ctx->run == MAP_FAILED) {
407 fprintf(stderr, "mmap vcpu area: %m\n");
408 goto err_fd;
410 return vcpu_ctx;
411 err_fd:
412 close(vcpu_ctx->fd);
413 err:
414 free(vcpu_ctx);
415 return NULL;
418 int kvm_create_vm(kvm_context_t kvm)
420 int fd = kvm->fd;
422 #ifdef KVM_CAP_IRQ_ROUTING
423 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
424 if (!kvm->irq_routes)
425 return -ENOMEM;
426 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
427 kvm->nr_allocated_irq_routes = 0;
428 #endif
430 fd = ioctl(fd, KVM_CREATE_VM, 0);
431 if (fd == -1) {
432 fprintf(stderr, "kvm_create_vm: %m\n");
433 return -1;
435 kvm->vm_fd = fd;
436 return 0;
439 static int kvm_create_default_phys_mem(kvm_context_t kvm,
440 unsigned long phys_mem_bytes,
441 void **vm_mem)
443 #ifdef KVM_CAP_USER_MEMORY
444 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
445 if (r > 0)
446 return 0;
447 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
448 #else
449 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
450 #endif
451 return -1;
454 int kvm_check_extension(kvm_context_t kvm, int ext)
456 int ret;
458 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
459 if (ret > 0)
460 return ret;
461 return 0;
464 void kvm_create_irqchip(kvm_context_t kvm)
466 int r;
468 kvm->irqchip_in_kernel = 0;
469 #ifdef KVM_CAP_IRQCHIP
470 if (!kvm->no_irqchip_creation) {
471 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
472 if (r > 0) { /* kernel irqchip supported */
473 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
474 if (r >= 0) {
475 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
476 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
477 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
478 KVM_CAP_IRQ_INJECT_STATUS);
479 if (r > 0)
480 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
481 #endif
482 kvm->irqchip_in_kernel = 1;
484 else
485 fprintf(stderr, "Create kernel PIC irqchip failed\n");
488 #endif
491 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
493 int r;
495 r = kvm_create_vm(kvm);
496 if (r < 0)
497 return r;
498 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
499 if (r < 0)
500 return r;
501 init_slots();
502 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
503 if (r < 0)
504 return r;
505 kvm_create_irqchip(kvm);
507 return 0;
511 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
512 unsigned long len, int log, int writable)
514 int r;
515 int prot = PROT_READ;
516 void *ptr;
517 struct kvm_userspace_memory_region memory = {
518 .memory_size = len,
519 .guest_phys_addr = phys_start,
520 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
523 if (writable)
524 prot |= PROT_WRITE;
526 #if !defined(__s390__)
527 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
528 #else
529 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
530 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
531 #endif
532 if (ptr == MAP_FAILED) {
533 fprintf(stderr, "%s: %s", __func__, strerror(errno));
534 return 0;
537 memset(ptr, 0, len);
539 memory.userspace_addr = (unsigned long)ptr;
540 memory.slot = get_free_slot(kvm);
541 DPRINTF("slot %d start %llx len %llx flags %x\n",
542 memory.slot,
543 memory.guest_phys_addr,
544 memory.memory_size,
545 memory.flags);
546 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
547 if (r == -1) {
548 fprintf(stderr, "%s: %s", __func__, strerror(errno));
549 return 0;
551 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
552 memory.userspace_addr, memory.flags);
554 return ptr;
557 int kvm_register_phys_mem(kvm_context_t kvm,
558 unsigned long phys_start, void *userspace_addr,
559 unsigned long len, int log)
562 struct kvm_userspace_memory_region memory = {
563 .memory_size = len,
564 .guest_phys_addr = phys_start,
565 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
566 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
568 int r;
570 memory.slot = get_free_slot(kvm);
571 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
572 memory.guest_phys_addr, memory.memory_size,
573 memory.userspace_addr, memory.slot, memory.flags);
574 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
575 if (r == -1) {
576 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
577 return -1;
579 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
580 memory.userspace_addr, memory.flags);
581 return 0;
585 /* destroy/free a whole slot.
586 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
588 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
589 unsigned long len)
591 int slot;
592 int r;
593 struct kvm_userspace_memory_region memory = {
594 .memory_size = 0,
595 .guest_phys_addr = phys_start,
596 .userspace_addr = 0,
597 .flags = 0,
600 slot = get_slot(phys_start);
602 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
603 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
604 __FUNCTION__, slot);
605 return;
607 if (phys_start != slots[slot].phys_addr) {
608 fprintf(stderr,
609 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
610 __FUNCTION__, phys_start, slots[slot].phys_addr);
611 phys_start = slots[slot].phys_addr;
614 memory.slot = slot;
615 DPRINTF("slot %d start %llx len %llx flags %x\n",
616 memory.slot,
617 memory.guest_phys_addr,
618 memory.memory_size,
619 memory.flags);
620 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
621 if (r == -1) {
622 fprintf(stderr, "destroy_userspace_phys_mem: %s",
623 strerror(errno));
624 return;
627 free_slot(memory.slot);
630 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
633 int slot = get_container_slot(phys_addr, size);
635 if (slot != -1) {
636 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
637 kvm_destroy_phys_mem(kvm, phys_addr, size);
638 return;
642 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
644 int r;
645 struct kvm_dirty_log log = {
646 .slot = slot,
649 log.dirty_bitmap = buf;
651 r = ioctl(kvm->vm_fd, ioctl_num, &log);
652 if (r == -1)
653 return -errno;
654 return 0;
657 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
659 int slot;
661 slot = get_slot(phys_addr);
662 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
665 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
666 unsigned long len, void *buf, void *opaque,
667 int (*cb)(unsigned long start, unsigned long len,
668 void*bitmap, void *opaque))
670 int i;
671 int r;
672 unsigned long end_addr = phys_addr + len;
674 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
675 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
676 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
677 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
678 if (r)
679 return r;
680 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
681 if (r)
682 return r;
685 return 0;
688 #ifdef KVM_CAP_IRQCHIP
690 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
692 struct kvm_irq_level event;
693 int r;
695 if (!kvm->irqchip_in_kernel)
696 return 0;
697 event.level = level;
698 event.irq = irq;
699 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
700 if (r == -1)
701 perror("kvm_set_irq_level");
703 if (status) {
704 #ifdef KVM_CAP_IRQ_INJECT_STATUS
705 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
706 1 : event.status;
707 #else
708 *status = 1;
709 #endif
712 return 1;
715 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
717 int r;
719 if (!kvm->irqchip_in_kernel)
720 return 0;
721 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
722 if (r == -1) {
723 r = -errno;
724 perror("kvm_get_irqchip\n");
726 return r;
729 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
731 int r;
733 if (!kvm->irqchip_in_kernel)
734 return 0;
735 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
736 if (r == -1) {
737 r = -errno;
738 perror("kvm_set_irqchip\n");
740 return r;
743 #endif
745 static int handle_io(kvm_vcpu_context_t vcpu)
747 struct kvm_run *run = vcpu->run;
748 kvm_context_t kvm = vcpu->kvm;
749 uint16_t addr = run->io.port;
750 int r;
751 int i;
752 void *p = (void *)run + run->io.data_offset;
754 for (i = 0; i < run->io.count; ++i) {
755 switch (run->io.direction) {
756 case KVM_EXIT_IO_IN:
757 switch (run->io.size) {
758 case 1:
759 r = kvm->callbacks->inb(kvm->opaque, addr, p);
760 break;
761 case 2:
762 r = kvm->callbacks->inw(kvm->opaque, addr, p);
763 break;
764 case 4:
765 r = kvm->callbacks->inl(kvm->opaque, addr, p);
766 break;
767 default:
768 fprintf(stderr, "bad I/O size %d\n", run->io.size);
769 return -EMSGSIZE;
771 break;
772 case KVM_EXIT_IO_OUT:
773 switch (run->io.size) {
774 case 1:
775 r = kvm->callbacks->outb(kvm->opaque, addr,
776 *(uint8_t *)p);
777 break;
778 case 2:
779 r = kvm->callbacks->outw(kvm->opaque, addr,
780 *(uint16_t *)p);
781 break;
782 case 4:
783 r = kvm->callbacks->outl(kvm->opaque, addr,
784 *(uint32_t *)p);
785 break;
786 default:
787 fprintf(stderr, "bad I/O size %d\n", run->io.size);
788 return -EMSGSIZE;
790 break;
791 default:
792 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
793 return -EPROTO;
796 p += run->io.size;
799 return 0;
802 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
804 #ifdef KVM_CAP_SET_GUEST_DEBUG
805 struct kvm_run *run = vcpu->run;
806 kvm_context_t kvm = vcpu->kvm;
808 return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch);
809 #else
810 return 0;
811 #endif
814 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
816 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
819 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
821 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
824 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
826 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
829 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
831 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
834 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
836 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
839 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
841 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
844 #ifdef KVM_CAP_MP_STATE
845 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
847 int r;
849 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
850 if (r > 0)
851 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
852 return -ENOSYS;
855 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
857 int r;
859 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
860 if (r > 0)
861 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
862 return -ENOSYS;
864 #endif
866 static int handle_mmio(kvm_vcpu_context_t vcpu)
868 unsigned long addr = vcpu->run->mmio.phys_addr;
869 kvm_context_t kvm = vcpu->kvm;
870 struct kvm_run *kvm_run = vcpu->run;
871 void *data = kvm_run->mmio.data;
873 /* hack: Red Hat 7.1 generates these weird accesses. */
874 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
875 return 0;
877 if (kvm_run->mmio.is_write)
878 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
879 kvm_run->mmio.len);
880 else
881 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
882 kvm_run->mmio.len);
885 int handle_io_window(kvm_context_t kvm)
887 return kvm->callbacks->io_window(kvm->opaque);
890 int handle_halt(kvm_vcpu_context_t vcpu)
892 return vcpu->kvm->callbacks->halt(vcpu->kvm->opaque, vcpu);
895 int handle_shutdown(kvm_context_t kvm, void *env)
897 return kvm->callbacks->shutdown(kvm->opaque, env);
900 int try_push_interrupts(kvm_context_t kvm)
902 return kvm->callbacks->try_push_interrupts(kvm->opaque);
905 static inline void push_nmi(kvm_context_t kvm)
907 #ifdef KVM_CAP_USER_NMI
908 kvm->callbacks->push_nmi(kvm->opaque);
909 #endif /* KVM_CAP_USER_NMI */
912 void post_kvm_run(kvm_context_t kvm, void *env)
914 kvm->callbacks->post_kvm_run(kvm->opaque, env);
917 int pre_kvm_run(kvm_context_t kvm, void *env)
919 return kvm->callbacks->pre_kvm_run(kvm->opaque, env);
922 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
924 return vcpu->run->if_flag;
927 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
929 return vcpu->run->ready_for_interrupt_injection;
932 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
934 int r;
935 int fd = vcpu->fd;
936 struct kvm_run *run = vcpu->run;
937 kvm_context_t kvm = vcpu->kvm;
939 again:
940 push_nmi(kvm);
941 #if !defined(__s390__)
942 if (!kvm->irqchip_in_kernel)
943 run->request_interrupt_window = try_push_interrupts(kvm);
944 #endif
945 r = pre_kvm_run(kvm, env);
946 if (r)
947 return r;
948 r = ioctl(fd, KVM_RUN, 0);
950 if (r == -1 && errno != EINTR && errno != EAGAIN) {
951 r = -errno;
952 post_kvm_run(kvm, env);
953 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
954 return r;
957 post_kvm_run(kvm, env);
959 #if defined(KVM_CAP_COALESCED_MMIO)
960 if (kvm->coalesced_mmio) {
961 struct kvm_coalesced_mmio_ring *ring = (void *)run +
962 kvm->coalesced_mmio * PAGE_SIZE;
963 while (ring->first != ring->last) {
964 kvm->callbacks->mmio_write(kvm->opaque,
965 ring->coalesced_mmio[ring->first].phys_addr,
966 &ring->coalesced_mmio[ring->first].data[0],
967 ring->coalesced_mmio[ring->first].len);
968 smp_wmb();
969 ring->first = (ring->first + 1) %
970 KVM_COALESCED_MMIO_MAX;
973 #endif
975 #if !defined(__s390__)
976 if (r == -1) {
977 r = handle_io_window(kvm);
978 goto more;
980 #endif
981 if (1) {
982 switch (run->exit_reason) {
983 case KVM_EXIT_UNKNOWN:
984 r = kvm->callbacks->unhandled(kvm, vcpu,
985 run->hw.hardware_exit_reason);
986 break;
987 case KVM_EXIT_FAIL_ENTRY:
988 r = kvm->callbacks->unhandled(kvm, vcpu,
989 run->fail_entry.hardware_entry_failure_reason);
990 break;
991 case KVM_EXIT_EXCEPTION:
992 fprintf(stderr, "exception %d (%x)\n",
993 run->ex.exception,
994 run->ex.error_code);
995 kvm_show_regs(vcpu);
996 kvm_show_code(vcpu);
997 abort();
998 break;
999 case KVM_EXIT_IO:
1000 r = handle_io(vcpu);
1001 break;
1002 case KVM_EXIT_DEBUG:
1003 r = handle_debug(vcpu, env);
1004 break;
1005 case KVM_EXIT_MMIO:
1006 r = handle_mmio(vcpu);
1007 break;
1008 case KVM_EXIT_HLT:
1009 r = handle_halt(vcpu);
1010 break;
1011 case KVM_EXIT_IRQ_WINDOW_OPEN:
1012 break;
1013 case KVM_EXIT_SHUTDOWN:
1014 r = handle_shutdown(kvm, env);
1015 break;
1016 #if defined(__s390__)
1017 case KVM_EXIT_S390_SIEIC:
1018 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
1019 run);
1020 break;
1021 case KVM_EXIT_S390_RESET:
1022 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
1023 break;
1024 #endif
1025 default:
1026 if (kvm_arch_run(vcpu)) {
1027 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1028 run->exit_reason);
1029 kvm_show_regs(vcpu);
1030 abort();
1032 break;
1035 more:
1036 if (!r)
1037 goto again;
1038 return r;
1041 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1043 struct kvm_interrupt intr;
1045 intr.irq = irq;
1046 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1049 #ifdef KVM_CAP_SET_GUEST_DEBUG
1050 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1052 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1054 #endif
1056 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1058 struct kvm_signal_mask *sigmask;
1059 int r;
1061 if (!sigset) {
1062 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1063 if (r == -1)
1064 r = -errno;
1065 return r;
1067 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1068 if (!sigmask)
1069 return -ENOMEM;
1071 sigmask->len = 8;
1072 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1073 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1074 if (r == -1)
1075 r = -errno;
1076 free(sigmask);
1077 return r;
1080 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1082 return kvm->irqchip_in_kernel;
1085 int kvm_pit_in_kernel(kvm_context_t kvm)
1087 return kvm->pit_in_kernel;
1090 int kvm_has_sync_mmu(kvm_context_t kvm)
1092 int r = 0;
1093 #ifdef KVM_CAP_SYNC_MMU
1094 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1095 #endif
1096 return r;
1099 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1101 #ifdef KVM_CAP_USER_NMI
1102 return ioctl(vcpu->fd, KVM_NMI);
1103 #else
1104 return -ENOSYS;
1105 #endif
1108 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1110 int r = 0;
1111 kvm->coalesced_mmio = 0;
1112 #ifdef KVM_CAP_COALESCED_MMIO
1113 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1114 if (r > 0) {
1115 kvm->coalesced_mmio = r;
1116 return 0;
1118 #endif
1119 return r;
1122 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1124 #ifdef KVM_CAP_COALESCED_MMIO
1125 struct kvm_coalesced_mmio_zone zone;
1126 int r;
1128 if (kvm->coalesced_mmio) {
1130 zone.addr = addr;
1131 zone.size = size;
1133 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1134 if (r == -1) {
1135 perror("kvm_register_coalesced_mmio_zone");
1136 return -errno;
1138 return 0;
1140 #endif
1141 return -ENOSYS;
1144 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1146 #ifdef KVM_CAP_COALESCED_MMIO
1147 struct kvm_coalesced_mmio_zone zone;
1148 int r;
1150 if (kvm->coalesced_mmio) {
1152 zone.addr = addr;
1153 zone.size = size;
1155 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1156 if (r == -1) {
1157 perror("kvm_unregister_coalesced_mmio_zone");
1158 return -errno;
1160 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1161 return 0;
1163 #endif
1164 return -ENOSYS;
1167 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1168 int kvm_assign_pci_device(kvm_context_t kvm,
1169 struct kvm_assigned_pci_dev *assigned_dev)
1171 int ret;
1173 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1174 if (ret < 0)
1175 return -errno;
1177 return ret;
1180 static int kvm_old_assign_irq(kvm_context_t kvm,
1181 struct kvm_assigned_irq *assigned_irq)
1183 int ret;
1185 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1186 if (ret < 0)
1187 return -errno;
1189 return ret;
1192 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1193 int kvm_assign_irq(kvm_context_t kvm,
1194 struct kvm_assigned_irq *assigned_irq)
1196 int ret;
1198 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1199 if (ret > 0) {
1200 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1201 if (ret < 0)
1202 return -errno;
1203 return ret;
1206 return kvm_old_assign_irq(kvm, assigned_irq);
1209 int kvm_deassign_irq(kvm_context_t kvm,
1210 struct kvm_assigned_irq *assigned_irq)
1212 int ret;
1214 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1215 if (ret < 0)
1216 return -errno;
1218 return ret;
1220 #else
1221 int kvm_assign_irq(kvm_context_t kvm,
1222 struct kvm_assigned_irq *assigned_irq)
1224 return kvm_old_assign_irq(kvm, assigned_irq);
1226 #endif
1227 #endif
1229 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1230 int kvm_deassign_pci_device(kvm_context_t kvm,
1231 struct kvm_assigned_pci_dev *assigned_dev)
1233 int ret;
1235 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1236 if (ret < 0)
1237 return -errno;
1239 return ret;
1241 #endif
1243 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1245 int ret = 0;
1247 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1248 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1249 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1250 if (ret <= 0)
1251 ret = 0;
1252 #endif
1253 return ret;
1256 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1258 #ifdef KVM_CAP_REINJECT_CONTROL
1259 int r;
1260 struct kvm_reinject_control control;
1262 control.pit_reinject = pit_reinject;
1264 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1265 if (r > 0) {
1266 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1267 if (r == -1)
1268 return -errno;
1269 return r;
1271 #endif
1272 return -ENOSYS;
1275 int kvm_has_gsi_routing(kvm_context_t kvm)
1277 int r = 0;
1279 #ifdef KVM_CAP_IRQ_ROUTING
1280 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1281 #endif
1282 return r;
1285 int kvm_get_gsi_count(kvm_context_t kvm)
1287 #ifdef KVM_CAP_IRQ_ROUTING
1288 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1289 #else
1290 return -EINVAL;
1291 #endif
1294 int kvm_clear_gsi_routes(kvm_context_t kvm)
1296 #ifdef KVM_CAP_IRQ_ROUTING
1297 kvm->irq_routes->nr = 0;
1298 return 0;
1299 #else
1300 return -EINVAL;
1301 #endif
1304 int kvm_add_routing_entry(kvm_context_t kvm,
1305 struct kvm_irq_routing_entry* entry)
1307 #ifdef KVM_CAP_IRQ_ROUTING
1308 struct kvm_irq_routing *z;
1309 struct kvm_irq_routing_entry *new;
1310 int n, size;
1312 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1313 n = kvm->nr_allocated_irq_routes * 2;
1314 if (n < 64)
1315 n = 64;
1316 size = sizeof(struct kvm_irq_routing);
1317 size += n * sizeof(*new);
1318 z = realloc(kvm->irq_routes, size);
1319 if (!z)
1320 return -ENOMEM;
1321 kvm->nr_allocated_irq_routes = n;
1322 kvm->irq_routes = z;
1324 n = kvm->irq_routes->nr++;
1325 new = &kvm->irq_routes->entries[n];
1326 memset(new, 0, sizeof(*new));
1327 new->gsi = entry->gsi;
1328 new->type = entry->type;
1329 new->flags = entry->flags;
1330 new->u = entry->u;
1332 set_gsi(kvm, entry->gsi);
1334 return 0;
1335 #else
1336 return -ENOSYS;
1337 #endif
1340 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1342 #ifdef KVM_CAP_IRQ_ROUTING
1343 struct kvm_irq_routing_entry e;
1345 e.gsi = gsi;
1346 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1347 e.flags = 0;
1348 e.u.irqchip.irqchip = irqchip;
1349 e.u.irqchip.pin = pin;
1350 return kvm_add_routing_entry(kvm, &e);
1351 #else
1352 return -ENOSYS;
1353 #endif
1356 int kvm_del_routing_entry(kvm_context_t kvm,
1357 struct kvm_irq_routing_entry* entry)
1359 #ifdef KVM_CAP_IRQ_ROUTING
1360 struct kvm_irq_routing_entry *e, *p;
1361 int i, gsi, found = 0;
1363 gsi = entry->gsi;
1365 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1366 e = &kvm->irq_routes->entries[i];
1367 if (e->type == entry->type
1368 && e->gsi == gsi) {
1369 switch (e->type)
1371 case KVM_IRQ_ROUTING_IRQCHIP: {
1372 if (e->u.irqchip.irqchip ==
1373 entry->u.irqchip.irqchip
1374 && e->u.irqchip.pin ==
1375 entry->u.irqchip.pin) {
1376 p = &kvm->irq_routes->
1377 entries[--kvm->irq_routes->nr];
1378 *e = *p;
1379 found = 1;
1381 break;
1383 case KVM_IRQ_ROUTING_MSI: {
1384 if (e->u.msi.address_lo ==
1385 entry->u.msi.address_lo
1386 && e->u.msi.address_hi ==
1387 entry->u.msi.address_hi
1388 && e->u.msi.data == entry->u.msi.data) {
1389 p = &kvm->irq_routes->
1390 entries[--kvm->irq_routes->nr];
1391 *e = *p;
1392 found = 1;
1394 break;
1396 default:
1397 break;
1399 if (found) {
1400 /* If there are no other users of this GSI
1401 * mark it available in the bitmap */
1402 for (i = 0; i < kvm->irq_routes->nr; i++) {
1403 e = &kvm->irq_routes->entries[i];
1404 if (e->gsi == gsi)
1405 break;
1407 if (i == kvm->irq_routes->nr)
1408 clear_gsi(kvm, gsi);
1410 return 0;
1414 return -ESRCH;
1415 #else
1416 return -ENOSYS;
1417 #endif
1420 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1422 #ifdef KVM_CAP_IRQ_ROUTING
1423 struct kvm_irq_routing_entry e;
1425 e.gsi = gsi;
1426 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1427 e.flags = 0;
1428 e.u.irqchip.irqchip = irqchip;
1429 e.u.irqchip.pin = pin;
1430 return kvm_del_routing_entry(kvm, &e);
1431 #else
1432 return -ENOSYS;
1433 #endif
1436 int kvm_commit_irq_routes(kvm_context_t kvm)
1438 #ifdef KVM_CAP_IRQ_ROUTING
1439 int r;
1441 kvm->irq_routes->flags = 0;
1442 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1443 if (r == -1)
1444 r = -errno;
1445 return r;
1446 #else
1447 return -ENOSYS;
1448 #endif
1451 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1453 int i, bit;
1454 uint32_t *buf = kvm->used_gsi_bitmap;
1456 /* Return the lowest unused GSI in the bitmap */
1457 for (i = 0; i < kvm->max_gsi / 32; i++) {
1458 bit = ffs(~buf[i]);
1459 if (!bit)
1460 continue;
1462 return bit - 1 + i * 32;
1465 return -ENOSPC;
1468 #ifdef KVM_CAP_DEVICE_MSIX
1469 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1470 struct kvm_assigned_msix_nr *msix_nr)
1472 int ret;
1474 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1475 if (ret < 0)
1476 return -errno;
1478 return ret;
1481 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1482 struct kvm_assigned_msix_entry *entry)
1484 int ret;
1486 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1487 if (ret < 0)
1488 return -errno;
1490 return ret;
1492 #endif
1494 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1496 #include <sys/eventfd.h>
1498 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1500 int r;
1501 struct kvm_irqfd data = {
1502 .fd = fd,
1503 .gsi = gsi,
1504 .flags = flags,
1507 r = ioctl(kvm->vm_fd, KVM_IRQFD, &data);
1508 if (r == -1)
1509 r = -errno;
1510 return r;
1513 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1515 int r;
1516 int fd;
1518 if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
1519 return -ENOENT;
1521 fd = eventfd(0, 0);
1522 if (fd < 0)
1523 return -errno;
1525 r = _kvm_irqfd(kvm, fd, gsi, 0);
1526 if (r < 0) {
1527 close(fd);
1528 return -errno;
1531 return fd;
1534 #else /* KVM_CAP_IRQFD */
1536 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1538 return -ENOSYS;
1541 #endif /* KVM_CAP_IRQFD */