test: Run tests with asid 1
[qemu/qemu-dev-zwu.git] / kvm / libkvm / libkvm.c
blobc5d6a7f5e8ee68249f386475e78f6a2be432d2aa
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #ifndef __user
18 #define __user /* temporary, until installed via make headers_install */
19 #endif
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
27 #endif
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <sys/mman.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <sys/ioctl.h>
37 #include <inttypes.h>
38 #include "libkvm.h"
40 #if defined(__x86_64__) || defined(__i386__)
41 #include "kvm-x86.h"
42 #endif
44 #if defined(__ia64__)
45 #include "kvm-ia64.h"
46 #endif
48 #if defined(__powerpc__)
49 #include "kvm-powerpc.h"
50 #endif
52 #if defined(__s390__)
53 #include "kvm-s390.h"
54 #endif
56 //#define DEBUG_MEMREG
57 #ifdef DEBUG_MEMREG
58 #define DPRINTF(fmt, args...) \
59 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
60 #else
61 #define DPRINTF(fmt, args...) do {} while (0)
62 #endif
64 #define MIN(x,y) ((x) < (y) ? (x) : (y))
65 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
67 int kvm_abi = EXPECTED_KVM_API_VERSION;
68 int kvm_page_size;
70 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
72 uint32_t *bitmap = kvm->used_gsi_bitmap;
74 if (gsi < kvm->max_gsi)
75 bitmap[gsi / 32] |= 1U << (gsi % 32);
76 else
77 DPRINTF("Invalid GSI %d\n");
80 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
82 uint32_t *bitmap = kvm->used_gsi_bitmap;
84 if (gsi < kvm->max_gsi)
85 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
86 else
87 DPRINTF("Invalid GSI %d\n");
90 struct slot_info {
91 unsigned long phys_addr;
92 unsigned long len;
93 unsigned long userspace_addr;
94 unsigned flags;
95 int logging_count;
98 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
100 static void init_slots(void)
102 int i;
104 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
105 slots[i].len = 0;
108 static int get_free_slot(kvm_context_t kvm)
110 int i;
111 int tss_ext;
113 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
114 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
115 #else
116 tss_ext = 0;
117 #endif
120 * on older kernels where the set tss ioctl is not supprted we must save
121 * slot 0 to hold the extended memory, as the vmx will use the last 3
122 * pages of this slot.
124 if (tss_ext > 0)
125 i = 0;
126 else
127 i = 1;
129 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
130 if (!slots[i].len)
131 return i;
132 return -1;
135 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
136 unsigned long userspace_addr, unsigned flags)
138 slots[slot].phys_addr = phys_addr;
139 slots[slot].len = len;
140 slots[slot].userspace_addr = userspace_addr;
141 slots[slot].flags = flags;
144 static void free_slot(int slot)
146 slots[slot].len = 0;
147 slots[slot].logging_count = 0;
150 static int get_slot(unsigned long phys_addr)
152 int i;
154 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
155 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
156 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
157 return i;
159 return -1;
162 /* Returns -1 if this slot is not totally contained on any other,
163 * and the number of the slot otherwise */
164 static int get_container_slot(uint64_t phys_addr, unsigned long size)
166 int i;
168 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
169 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
170 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
171 return i;
172 return -1;
175 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
177 int slot = get_container_slot(phys_addr, size);
178 if (slot == -1)
179 return 0;
180 return 1;
184 * dirty pages logging control
186 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
187 unsigned long phys_addr,
188 unsigned flags,
189 unsigned mask)
191 int r = -1;
192 int slot = get_slot(phys_addr);
194 if (slot == -1) {
195 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
196 return 1;
199 flags = (slots[slot].flags & ~mask) | flags;
200 if (flags == slots[slot].flags)
201 return 0;
202 slots[slot].flags = flags;
205 struct kvm_userspace_memory_region mem = {
206 .slot = slot,
207 .memory_size = slots[slot].len,
208 .guest_phys_addr = slots[slot].phys_addr,
209 .userspace_addr = slots[slot].userspace_addr,
210 .flags = slots[slot].flags,
214 DPRINTF("slot %d start %llx len %llx flags %x\n",
215 mem.slot,
216 mem.guest_phys_addr,
217 mem.memory_size,
218 mem.flags);
219 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
220 if (r == -1)
221 fprintf(stderr, "%s: %m\n", __FUNCTION__);
223 return r;
226 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
227 int (*change)(kvm_context_t kvm,
228 uint64_t start,
229 uint64_t len))
231 int i, r;
233 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
234 if (slots[i].len)
235 r = change(kvm, slots[i].phys_addr, slots[i].len);
237 return r;
240 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
241 uint64_t phys_addr,
242 uint64_t len)
244 int slot = get_slot(phys_addr);
246 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
247 if (slot == -1) {
248 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
249 return -EINVAL;
252 if (slots[slot].logging_count++)
253 return 0;
255 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
256 KVM_MEM_LOG_DIRTY_PAGES,
257 KVM_MEM_LOG_DIRTY_PAGES);
260 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
261 uint64_t phys_addr,
262 uint64_t len)
264 int slot = get_slot(phys_addr);
266 if (slot == -1) {
267 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
268 return -EINVAL;
271 if (--slots[slot].logging_count)
272 return 0;
274 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
276 KVM_MEM_LOG_DIRTY_PAGES);
280 * Enable dirty page logging for all memory regions
282 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
284 if (kvm->dirty_pages_log_all)
285 return 0;
286 kvm->dirty_pages_log_all = 1;
287 return kvm_dirty_pages_log_change_all(kvm,
288 kvm_dirty_pages_log_enable_slot);
292 * Enable dirty page logging only for memory regions that were created with
293 * dirty logging enabled (disable for all other memory regions).
295 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
297 if (!kvm->dirty_pages_log_all)
298 return 0;
299 kvm->dirty_pages_log_all = 0;
300 return kvm_dirty_pages_log_change_all(kvm,
301 kvm_dirty_pages_log_disable_slot);
305 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
306 void *opaque)
308 int fd;
309 kvm_context_t kvm;
310 int r, gsi_count;
312 fd = open("/dev/kvm", O_RDWR);
313 if (fd == -1) {
314 perror("open /dev/kvm");
315 return NULL;
317 r = ioctl(fd, KVM_GET_API_VERSION, 0);
318 if (r == -1) {
319 fprintf(stderr, "kvm kernel version too old: "
320 "KVM_GET_API_VERSION ioctl not supported\n");
321 goto out_close;
323 if (r < EXPECTED_KVM_API_VERSION) {
324 fprintf(stderr, "kvm kernel version too old: "
325 "We expect API version %d or newer, but got "
326 "version %d\n",
327 EXPECTED_KVM_API_VERSION, r);
328 goto out_close;
330 if (r > EXPECTED_KVM_API_VERSION) {
331 fprintf(stderr, "kvm userspace version too old\n");
332 goto out_close;
334 kvm_abi = r;
335 kvm_page_size = getpagesize();
336 kvm = malloc(sizeof(*kvm));
337 if (kvm == NULL)
338 goto out_close;
339 memset(kvm, 0, sizeof(*kvm));
340 kvm->fd = fd;
341 kvm->vm_fd = -1;
342 kvm->callbacks = callbacks;
343 kvm->opaque = opaque;
344 kvm->dirty_pages_log_all = 0;
345 kvm->no_irqchip_creation = 0;
346 kvm->no_pit_creation = 0;
348 gsi_count = kvm_get_gsi_count(kvm);
349 if (gsi_count > 0) {
350 int gsi_bits, i;
352 /* Round up so we can search ints using ffs */
353 gsi_bits = ALIGN(gsi_count, 32);
354 kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
355 if (!kvm->used_gsi_bitmap)
356 goto out_close;
357 memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
358 kvm->max_gsi = gsi_bits;
360 /* Mark any over-allocated bits as already in use */
361 for (i = gsi_count; i < gsi_bits; i++)
362 set_gsi(kvm, i);
365 return kvm;
366 out_close:
367 close(fd);
368 return NULL;
371 void kvm_finalize(kvm_context_t kvm)
373 if (kvm->vcpu_fd[0] != -1)
374 close(kvm->vcpu_fd[0]);
375 if (kvm->vm_fd != -1)
376 close(kvm->vm_fd);
377 close(kvm->fd);
378 free(kvm);
381 void kvm_disable_irqchip_creation(kvm_context_t kvm)
383 kvm->no_irqchip_creation = 1;
386 void kvm_disable_pit_creation(kvm_context_t kvm)
388 kvm->no_pit_creation = 1;
391 int kvm_create_vcpu(kvm_context_t kvm, int slot)
393 long mmap_size;
394 int r;
396 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, slot);
397 if (r == -1) {
398 r = -errno;
399 fprintf(stderr, "kvm_create_vcpu: %m\n");
400 return r;
402 kvm->vcpu_fd[slot] = r;
403 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
404 if (mmap_size == -1) {
405 r = -errno;
406 fprintf(stderr, "get vcpu mmap size: %m\n");
407 return r;
409 kvm->run[slot] = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
410 kvm->vcpu_fd[slot], 0);
411 if (kvm->run[slot] == MAP_FAILED) {
412 r = -errno;
413 fprintf(stderr, "mmap vcpu area: %m\n");
414 return r;
416 return 0;
419 int kvm_create_vm(kvm_context_t kvm)
421 int fd = kvm->fd;
423 #ifdef KVM_CAP_IRQ_ROUTING
424 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
425 if (!kvm->irq_routes)
426 return -ENOMEM;
427 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
428 kvm->nr_allocated_irq_routes = 0;
429 #endif
431 kvm->vcpu_fd[0] = -1;
433 fd = ioctl(fd, KVM_CREATE_VM, 0);
434 if (fd == -1) {
435 fprintf(stderr, "kvm_create_vm: %m\n");
436 return -1;
438 kvm->vm_fd = fd;
439 return 0;
442 static int kvm_create_default_phys_mem(kvm_context_t kvm,
443 unsigned long phys_mem_bytes,
444 void **vm_mem)
446 #ifdef KVM_CAP_USER_MEMORY
447 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
448 if (r > 0)
449 return 0;
450 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
451 #else
452 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
453 #endif
454 return -1;
457 int kvm_check_extension(kvm_context_t kvm, int ext)
459 int ret;
461 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
462 if (ret > 0)
463 return ret;
464 return 0;
467 void kvm_create_irqchip(kvm_context_t kvm)
469 int r;
471 kvm->irqchip_in_kernel = 0;
472 #ifdef KVM_CAP_IRQCHIP
473 if (!kvm->no_irqchip_creation) {
474 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
475 if (r > 0) { /* kernel irqchip supported */
476 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
477 if (r >= 0) {
478 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
479 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
480 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
481 KVM_CAP_IRQ_INJECT_STATUS);
482 if (r > 0)
483 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
484 #endif
485 kvm->irqchip_in_kernel = 1;
487 else
488 fprintf(stderr, "Create kernel PIC irqchip failed\n");
491 #endif
494 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
496 int r;
498 r = kvm_create_vm(kvm);
499 if (r < 0)
500 return r;
501 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
502 if (r < 0)
503 return r;
504 init_slots();
505 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
506 if (r < 0)
507 return r;
508 kvm_create_irqchip(kvm);
510 return 0;
514 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
515 unsigned long len, int log, int writable)
517 int r;
518 int prot = PROT_READ;
519 void *ptr;
520 struct kvm_userspace_memory_region memory = {
521 .memory_size = len,
522 .guest_phys_addr = phys_start,
523 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
526 if (writable)
527 prot |= PROT_WRITE;
529 #if !defined(__s390__)
530 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
531 #else
532 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
533 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
534 #endif
535 if (ptr == MAP_FAILED) {
536 fprintf(stderr, "%s: %s", __func__, strerror(errno));
537 return 0;
540 memset(ptr, 0, len);
542 memory.userspace_addr = (unsigned long)ptr;
543 memory.slot = get_free_slot(kvm);
544 DPRINTF("slot %d start %llx len %llx flags %x\n",
545 memory.slot,
546 memory.guest_phys_addr,
547 memory.memory_size,
548 memory.flags);
549 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
550 if (r == -1) {
551 fprintf(stderr, "%s: %s", __func__, strerror(errno));
552 return 0;
554 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
555 memory.userspace_addr, memory.flags);
557 return ptr;
560 int kvm_register_phys_mem(kvm_context_t kvm,
561 unsigned long phys_start, void *userspace_addr,
562 unsigned long len, int log)
565 struct kvm_userspace_memory_region memory = {
566 .memory_size = len,
567 .guest_phys_addr = phys_start,
568 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
569 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
571 int r;
573 memory.slot = get_free_slot(kvm);
574 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
575 memory.guest_phys_addr, memory.memory_size,
576 memory.userspace_addr, memory.slot, memory.flags);
577 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
578 if (r == -1) {
579 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
580 return -1;
582 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
583 memory.userspace_addr, memory.flags);
584 return 0;
588 /* destroy/free a whole slot.
589 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
591 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
592 unsigned long len)
594 int slot;
595 int r;
596 struct kvm_userspace_memory_region memory = {
597 .memory_size = 0,
598 .guest_phys_addr = phys_start,
599 .userspace_addr = 0,
600 .flags = 0,
603 slot = get_slot(phys_start);
605 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
606 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
607 __FUNCTION__, slot);
608 return;
610 if (phys_start != slots[slot].phys_addr) {
611 fprintf(stderr,
612 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
613 __FUNCTION__, phys_start, slots[slot].phys_addr);
614 phys_start = slots[slot].phys_addr;
617 memory.slot = slot;
618 DPRINTF("slot %d start %llx len %llx flags %x\n",
619 memory.slot,
620 memory.guest_phys_addr,
621 memory.memory_size,
622 memory.flags);
623 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
624 if (r == -1) {
625 fprintf(stderr, "destroy_userspace_phys_mem: %s",
626 strerror(errno));
627 return;
630 free_slot(memory.slot);
633 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
636 int slot = get_container_slot(phys_addr, size);
638 if (slot != -1) {
639 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
640 kvm_destroy_phys_mem(kvm, phys_addr, size);
641 return;
645 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
647 int r;
648 struct kvm_dirty_log log = {
649 .slot = slot,
652 log.dirty_bitmap = buf;
654 r = ioctl(kvm->vm_fd, ioctl_num, &log);
655 if (r == -1)
656 return -errno;
657 return 0;
660 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
662 int slot;
664 slot = get_slot(phys_addr);
665 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
668 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
669 unsigned long len, void *buf, void *opaque,
670 int (*cb)(unsigned long start, unsigned long len,
671 void*bitmap, void *opaque))
673 int i;
674 int r;
675 unsigned long end_addr = phys_addr + len;
677 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
678 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
679 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
680 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
681 if (r)
682 return r;
683 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
684 if (r)
685 return r;
688 return 0;
691 #ifdef KVM_CAP_IRQCHIP
693 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
695 struct kvm_irq_level event;
696 int r;
698 if (!kvm->irqchip_in_kernel)
699 return 0;
700 event.level = level;
701 event.irq = irq;
702 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
703 if (r == -1)
704 perror("kvm_set_irq_level");
706 if (status) {
707 #ifdef KVM_CAP_IRQ_INJECT_STATUS
708 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
709 1 : event.status;
710 #else
711 *status = 1;
712 #endif
715 return 1;
718 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
720 int r;
722 if (!kvm->irqchip_in_kernel)
723 return 0;
724 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
725 if (r == -1) {
726 r = -errno;
727 perror("kvm_get_irqchip\n");
729 return r;
732 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
734 int r;
736 if (!kvm->irqchip_in_kernel)
737 return 0;
738 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
739 if (r == -1) {
740 r = -errno;
741 perror("kvm_set_irqchip\n");
743 return r;
746 #endif
748 static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
750 uint16_t addr = run->io.port;
751 int r;
752 int i;
753 void *p = (void *)run + run->io.data_offset;
755 for (i = 0; i < run->io.count; ++i) {
756 switch (run->io.direction) {
757 case KVM_EXIT_IO_IN:
758 switch (run->io.size) {
759 case 1:
760 r = kvm->callbacks->inb(kvm->opaque, addr, p);
761 break;
762 case 2:
763 r = kvm->callbacks->inw(kvm->opaque, addr, p);
764 break;
765 case 4:
766 r = kvm->callbacks->inl(kvm->opaque, addr, p);
767 break;
768 default:
769 fprintf(stderr, "bad I/O size %d\n", run->io.size);
770 return -EMSGSIZE;
772 break;
773 case KVM_EXIT_IO_OUT:
774 switch (run->io.size) {
775 case 1:
776 r = kvm->callbacks->outb(kvm->opaque, addr,
777 *(uint8_t *)p);
778 break;
779 case 2:
780 r = kvm->callbacks->outw(kvm->opaque, addr,
781 *(uint16_t *)p);
782 break;
783 case 4:
784 r = kvm->callbacks->outl(kvm->opaque, addr,
785 *(uint32_t *)p);
786 break;
787 default:
788 fprintf(stderr, "bad I/O size %d\n", run->io.size);
789 return -EMSGSIZE;
791 break;
792 default:
793 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
794 return -EPROTO;
797 p += run->io.size;
800 return 0;
803 int handle_debug(kvm_context_t kvm, int vcpu, void *env)
805 #ifdef KVM_CAP_SET_GUEST_DEBUG
806 struct kvm_run *run = kvm->run[vcpu];
808 return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch);
809 #else
810 return 0;
811 #endif
814 int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
816 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
819 int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
821 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
824 int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
826 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_FPU, fpu);
829 int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
831 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_FPU, fpu);
834 int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
836 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
839 int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
841 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
844 #ifdef KVM_CAP_MP_STATE
845 int kvm_get_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
847 int r;
849 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
850 if (r > 0)
851 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MP_STATE, mp_state);
852 return -ENOSYS;
855 int kvm_set_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
857 int r;
859 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
860 if (r > 0)
861 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MP_STATE, mp_state);
862 return -ENOSYS;
864 #endif
866 static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
868 unsigned long addr = kvm_run->mmio.phys_addr;
869 void *data = kvm_run->mmio.data;
871 /* hack: Red Hat 7.1 generates these weird accesses. */
872 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
873 return 0;
875 if (kvm_run->mmio.is_write)
876 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
877 kvm_run->mmio.len);
878 else
879 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
880 kvm_run->mmio.len);
883 int handle_io_window(kvm_context_t kvm)
885 return kvm->callbacks->io_window(kvm->opaque);
888 int handle_halt(kvm_context_t kvm, int vcpu)
890 return kvm->callbacks->halt(kvm->opaque, vcpu);
893 int handle_shutdown(kvm_context_t kvm, void *env)
895 return kvm->callbacks->shutdown(kvm->opaque, env);
898 int try_push_interrupts(kvm_context_t kvm)
900 return kvm->callbacks->try_push_interrupts(kvm->opaque);
903 static inline void push_nmi(kvm_context_t kvm)
905 #ifdef KVM_CAP_USER_NMI
906 kvm->callbacks->push_nmi(kvm->opaque);
907 #endif /* KVM_CAP_USER_NMI */
910 void post_kvm_run(kvm_context_t kvm, void *env)
912 kvm->callbacks->post_kvm_run(kvm->opaque, env);
915 int pre_kvm_run(kvm_context_t kvm, void *env)
917 return kvm->callbacks->pre_kvm_run(kvm->opaque, env);
920 int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu)
922 struct kvm_run *run = kvm->run[vcpu];
924 return run->if_flag;
927 int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu)
929 struct kvm_run *run = kvm->run[vcpu];
931 return run->ready_for_interrupt_injection;
934 int kvm_run(kvm_context_t kvm, int vcpu, void *env)
936 int r;
937 int fd = kvm->vcpu_fd[vcpu];
938 struct kvm_run *run = kvm->run[vcpu];
940 again:
941 push_nmi(kvm);
942 #if !defined(__s390__)
943 if (!kvm->irqchip_in_kernel)
944 run->request_interrupt_window = try_push_interrupts(kvm);
945 #endif
946 r = pre_kvm_run(kvm, env);
947 if (r)
948 return r;
949 r = ioctl(fd, KVM_RUN, 0);
951 if (r == -1 && errno != EINTR && errno != EAGAIN) {
952 r = -errno;
953 post_kvm_run(kvm, env);
954 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
955 return r;
958 post_kvm_run(kvm, env);
960 #if defined(KVM_CAP_COALESCED_MMIO)
961 if (kvm->coalesced_mmio) {
962 struct kvm_coalesced_mmio_ring *ring = (void *)run +
963 kvm->coalesced_mmio * PAGE_SIZE;
964 while (ring->first != ring->last) {
965 kvm->callbacks->mmio_write(kvm->opaque,
966 ring->coalesced_mmio[ring->first].phys_addr,
967 &ring->coalesced_mmio[ring->first].data[0],
968 ring->coalesced_mmio[ring->first].len);
969 smp_wmb();
970 ring->first = (ring->first + 1) %
971 KVM_COALESCED_MMIO_MAX;
974 #endif
976 #if !defined(__s390__)
977 if (r == -1) {
978 r = handle_io_window(kvm);
979 goto more;
981 #endif
982 if (1) {
983 switch (run->exit_reason) {
984 case KVM_EXIT_UNKNOWN:
985 fprintf(stderr, "unhandled vm exit: 0x%x vcpu_id %d\n",
986 (unsigned)run->hw.hardware_exit_reason, vcpu);
987 kvm_show_regs(kvm, vcpu);
988 abort();
989 break;
990 case KVM_EXIT_FAIL_ENTRY:
991 fprintf(stderr, "kvm_run: failed entry, reason %u\n",
992 (unsigned)run->fail_entry.hardware_entry_failure_reason & 0xffff);
993 kvm_show_regs(kvm, vcpu);
994 return -ENOEXEC;
995 break;
996 case KVM_EXIT_EXCEPTION:
997 fprintf(stderr, "exception %d (%x)\n",
998 run->ex.exception,
999 run->ex.error_code);
1000 kvm_show_regs(kvm, vcpu);
1001 kvm_show_code(kvm, vcpu);
1002 abort();
1003 break;
1004 case KVM_EXIT_IO:
1005 r = handle_io(kvm, run, vcpu);
1006 break;
1007 case KVM_EXIT_DEBUG:
1008 r = handle_debug(kvm, vcpu, env);
1009 break;
1010 case KVM_EXIT_MMIO:
1011 r = handle_mmio(kvm, run);
1012 break;
1013 case KVM_EXIT_HLT:
1014 r = handle_halt(kvm, vcpu);
1015 break;
1016 case KVM_EXIT_IRQ_WINDOW_OPEN:
1017 break;
1018 case KVM_EXIT_SHUTDOWN:
1019 r = handle_shutdown(kvm, env);
1020 break;
1021 #if defined(__s390__)
1022 case KVM_EXIT_S390_SIEIC:
1023 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
1024 run);
1025 break;
1026 case KVM_EXIT_S390_RESET:
1027 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
1028 break;
1029 #endif
1030 default:
1031 if (kvm_arch_run(run, kvm, vcpu)) {
1032 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1033 run->exit_reason);
1034 kvm_show_regs(kvm, vcpu);
1035 abort();
1037 break;
1040 more:
1041 if (!r)
1042 goto again;
1043 return r;
1046 int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
1048 struct kvm_interrupt intr;
1050 intr.irq = irq;
1051 return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
1054 #ifdef KVM_CAP_SET_GUEST_DEBUG
1055 int kvm_set_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_guest_debug *dbg)
1057 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_GUEST_DEBUG, dbg);
1059 #endif
1061 int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset)
1063 struct kvm_signal_mask *sigmask;
1064 int r;
1066 if (!sigset) {
1067 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, NULL);
1068 if (r == -1)
1069 r = -errno;
1070 return r;
1072 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1073 if (!sigmask)
1074 return -ENOMEM;
1076 sigmask->len = 8;
1077 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1078 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, sigmask);
1079 if (r == -1)
1080 r = -errno;
1081 free(sigmask);
1082 return r;
1085 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1087 return kvm->irqchip_in_kernel;
1090 int kvm_pit_in_kernel(kvm_context_t kvm)
1092 return kvm->pit_in_kernel;
1095 int kvm_has_sync_mmu(kvm_context_t kvm)
1097 int r = 0;
1098 #ifdef KVM_CAP_SYNC_MMU
1099 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1100 #endif
1101 return r;
1104 int kvm_inject_nmi(kvm_context_t kvm, int vcpu)
1106 #ifdef KVM_CAP_USER_NMI
1107 return ioctl(kvm->vcpu_fd[vcpu], KVM_NMI);
1108 #else
1109 return -ENOSYS;
1110 #endif
1113 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1115 int r = 0;
1116 kvm->coalesced_mmio = 0;
1117 #ifdef KVM_CAP_COALESCED_MMIO
1118 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1119 if (r > 0) {
1120 kvm->coalesced_mmio = r;
1121 return 0;
1123 #endif
1124 return r;
1127 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1129 #ifdef KVM_CAP_COALESCED_MMIO
1130 struct kvm_coalesced_mmio_zone zone;
1131 int r;
1133 if (kvm->coalesced_mmio) {
1135 zone.addr = addr;
1136 zone.size = size;
1138 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1139 if (r == -1) {
1140 perror("kvm_register_coalesced_mmio_zone");
1141 return -errno;
1143 return 0;
1145 #endif
1146 return -ENOSYS;
1149 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1151 #ifdef KVM_CAP_COALESCED_MMIO
1152 struct kvm_coalesced_mmio_zone zone;
1153 int r;
1155 if (kvm->coalesced_mmio) {
1157 zone.addr = addr;
1158 zone.size = size;
1160 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1161 if (r == -1) {
1162 perror("kvm_unregister_coalesced_mmio_zone");
1163 return -errno;
1165 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1166 return 0;
1168 #endif
1169 return -ENOSYS;
1172 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1173 int kvm_assign_pci_device(kvm_context_t kvm,
1174 struct kvm_assigned_pci_dev *assigned_dev)
1176 int ret;
1178 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1179 if (ret < 0)
1180 return -errno;
1182 return ret;
1185 static int kvm_old_assign_irq(kvm_context_t kvm,
1186 struct kvm_assigned_irq *assigned_irq)
1188 int ret;
1190 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1191 if (ret < 0)
1192 return -errno;
1194 return ret;
1197 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1198 int kvm_assign_irq(kvm_context_t kvm,
1199 struct kvm_assigned_irq *assigned_irq)
1201 int ret;
1203 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1204 if (ret > 0) {
1205 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1206 if (ret < 0)
1207 return -errno;
1208 return ret;
1211 return kvm_old_assign_irq(kvm, assigned_irq);
1214 int kvm_deassign_irq(kvm_context_t kvm,
1215 struct kvm_assigned_irq *assigned_irq)
1217 int ret;
1219 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1220 if (ret < 0)
1221 return -errno;
1223 return ret;
1225 #else
1226 int kvm_assign_irq(kvm_context_t kvm,
1227 struct kvm_assigned_irq *assigned_irq)
1229 return kvm_old_assign_irq(kvm, assigned_irq);
1231 #endif
1232 #endif
1234 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1235 int kvm_deassign_pci_device(kvm_context_t kvm,
1236 struct kvm_assigned_pci_dev *assigned_dev)
1238 int ret;
1240 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1241 if (ret < 0)
1242 return -errno;
1244 return ret;
1246 #endif
1248 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1250 int ret = 0;
1252 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1253 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1254 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1255 if (ret <= 0)
1256 ret = 0;
1257 #endif
1258 return ret;
1261 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1263 #ifdef KVM_CAP_REINJECT_CONTROL
1264 int r;
1265 struct kvm_reinject_control control;
1267 control.pit_reinject = pit_reinject;
1269 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1270 if (r > 0) {
1271 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1272 if (r == -1)
1273 return -errno;
1274 return r;
1276 #endif
1277 return -ENOSYS;
1280 int kvm_has_gsi_routing(kvm_context_t kvm)
1282 int r = 0;
1284 #ifdef KVM_CAP_IRQ_ROUTING
1285 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1286 #endif
1287 return r;
1290 int kvm_get_gsi_count(kvm_context_t kvm)
1292 #ifdef KVM_CAP_IRQ_ROUTING
1293 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1294 #else
1295 return -EINVAL;
1296 #endif
1299 int kvm_clear_gsi_routes(kvm_context_t kvm)
1301 #ifdef KVM_CAP_IRQ_ROUTING
1302 kvm->irq_routes->nr = 0;
1303 return 0;
1304 #else
1305 return -EINVAL;
1306 #endif
1309 int kvm_add_routing_entry(kvm_context_t kvm,
1310 struct kvm_irq_routing_entry* entry)
1312 #ifdef KVM_CAP_IRQ_ROUTING
1313 struct kvm_irq_routing *z;
1314 struct kvm_irq_routing_entry *new;
1315 int n, size;
1317 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1318 n = kvm->nr_allocated_irq_routes * 2;
1319 if (n < 64)
1320 n = 64;
1321 size = sizeof(struct kvm_irq_routing);
1322 size += n * sizeof(*new);
1323 z = realloc(kvm->irq_routes, size);
1324 if (!z)
1325 return -ENOMEM;
1326 kvm->nr_allocated_irq_routes = n;
1327 kvm->irq_routes = z;
1329 n = kvm->irq_routes->nr++;
1330 new = &kvm->irq_routes->entries[n];
1331 memset(new, 0, sizeof(*new));
1332 new->gsi = entry->gsi;
1333 new->type = entry->type;
1334 new->flags = entry->flags;
1335 new->u = entry->u;
1337 set_gsi(kvm, entry->gsi);
1339 return 0;
1340 #else
1341 return -ENOSYS;
1342 #endif
1345 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1347 #ifdef KVM_CAP_IRQ_ROUTING
1348 struct kvm_irq_routing_entry e;
1350 e.gsi = gsi;
1351 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1352 e.flags = 0;
1353 e.u.irqchip.irqchip = irqchip;
1354 e.u.irqchip.pin = pin;
1355 return kvm_add_routing_entry(kvm, &e);
1356 #else
1357 return -ENOSYS;
1358 #endif
1361 int kvm_del_routing_entry(kvm_context_t kvm,
1362 struct kvm_irq_routing_entry* entry)
1364 #ifdef KVM_CAP_IRQ_ROUTING
1365 struct kvm_irq_routing_entry *e, *p;
1366 int i, gsi, found = 0;
1368 gsi = entry->gsi;
1370 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1371 e = &kvm->irq_routes->entries[i];
1372 if (e->type == entry->type
1373 && e->gsi == gsi) {
1374 switch (e->type)
1376 case KVM_IRQ_ROUTING_IRQCHIP: {
1377 if (e->u.irqchip.irqchip ==
1378 entry->u.irqchip.irqchip
1379 && e->u.irqchip.pin ==
1380 entry->u.irqchip.pin) {
1381 p = &kvm->irq_routes->
1382 entries[--kvm->irq_routes->nr];
1383 *e = *p;
1384 found = 1;
1386 break;
1388 case KVM_IRQ_ROUTING_MSI: {
1389 if (e->u.msi.address_lo ==
1390 entry->u.msi.address_lo
1391 && e->u.msi.address_hi ==
1392 entry->u.msi.address_hi
1393 && e->u.msi.data == entry->u.msi.data) {
1394 p = &kvm->irq_routes->
1395 entries[--kvm->irq_routes->nr];
1396 *e = *p;
1397 found = 1;
1399 break;
1401 default:
1402 break;
1404 if (found) {
1405 /* If there are no other users of this GSI
1406 * mark it available in the bitmap */
1407 for (i = 0; i < kvm->irq_routes->nr; i++) {
1408 e = &kvm->irq_routes->entries[i];
1409 if (e->gsi == gsi)
1410 break;
1412 if (i == kvm->irq_routes->nr)
1413 clear_gsi(kvm, gsi);
1415 return 0;
1419 return -ESRCH;
1420 #else
1421 return -ENOSYS;
1422 #endif
1425 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1427 #ifdef KVM_CAP_IRQ_ROUTING
1428 struct kvm_irq_routing_entry e;
1430 e.gsi = gsi;
1431 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1432 e.flags = 0;
1433 e.u.irqchip.irqchip = irqchip;
1434 e.u.irqchip.pin = pin;
1435 return kvm_del_routing_entry(kvm, &e);
1436 #else
1437 return -ENOSYS;
1438 #endif
1441 int kvm_commit_irq_routes(kvm_context_t kvm)
1443 #ifdef KVM_CAP_IRQ_ROUTING
1444 int r;
1446 kvm->irq_routes->flags = 0;
1447 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1448 if (r == -1)
1449 r = -errno;
1450 return r;
1451 #else
1452 return -ENOSYS;
1453 #endif
1456 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1458 int i, bit;
1459 uint32_t *buf = kvm->used_gsi_bitmap;
1461 /* Return the lowest unused GSI in the bitmap */
1462 for (i = 0; i < kvm->max_gsi / 32; i++) {
1463 bit = ffs(~buf[i]);
1464 if (!bit)
1465 continue;
1467 return bit - 1 + i * 32;
1470 return -ENOSPC;
1473 #ifdef KVM_CAP_DEVICE_MSIX
1474 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1475 struct kvm_assigned_msix_nr *msix_nr)
1477 int ret;
1479 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1480 if (ret < 0)
1481 return -errno;
1483 return ret;
1486 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1487 struct kvm_assigned_msix_entry *entry)
1489 int ret;
1491 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1492 if (ret < 0)
1493 return -errno;
1495 return ret;
1497 #endif