Allocate memory below 4GB as one chunk
[qemu-kvm/fedora.git] / libkvm-all.c
blob1668e327ac1c96936df3bbb767c90e0fa68c9715
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #ifndef __user
18 #define __user /* temporary, until installed via make headers_install */
19 #endif
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
27 #endif
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <sys/mman.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <sys/ioctl.h>
37 #include <inttypes.h>
38 #include "libkvm-all.h"
40 #include "libkvm.h"
42 //#define DEBUG_MEMREG
43 #ifdef DEBUG_MEMREG
44 #define DPRINTF(fmt, args...) \
45 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
46 #else
47 #define DPRINTF(fmt, args...) do {} while (0)
48 #endif
50 #define MIN(x,y) ((x) < (y) ? (x) : (y))
51 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
53 int kvm_abi = EXPECTED_KVM_API_VERSION;
54 int kvm_page_size;
56 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
58 uint32_t *bitmap = kvm->used_gsi_bitmap;
60 if (gsi < kvm->max_gsi)
61 bitmap[gsi / 32] |= 1U << (gsi % 32);
62 else
63 DPRINTF("Invalid GSI %d\n");
66 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
68 uint32_t *bitmap = kvm->used_gsi_bitmap;
70 if (gsi < kvm->max_gsi)
71 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
72 else
73 DPRINTF("Invalid GSI %d\n");
76 struct slot_info {
77 unsigned long phys_addr;
78 unsigned long len;
79 unsigned long userspace_addr;
80 unsigned flags;
81 int logging_count;
84 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
86 static void init_slots(void)
88 int i;
90 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
91 slots[i].len = 0;
94 static int get_free_slot(kvm_context_t kvm)
96 int i;
97 int tss_ext;
99 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
100 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
101 #else
102 tss_ext = 0;
103 #endif
106 * on older kernels where the set tss ioctl is not supprted we must save
107 * slot 0 to hold the extended memory, as the vmx will use the last 3
108 * pages of this slot.
110 if (tss_ext > 0)
111 i = 0;
112 else
113 i = 1;
115 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
116 if (!slots[i].len)
117 return i;
118 return -1;
121 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
122 unsigned long userspace_addr, unsigned flags)
124 slots[slot].phys_addr = phys_addr;
125 slots[slot].len = len;
126 slots[slot].userspace_addr = userspace_addr;
127 slots[slot].flags = flags;
130 static void free_slot(int slot)
132 slots[slot].len = 0;
133 slots[slot].logging_count = 0;
136 static int get_slot(unsigned long phys_addr)
138 int i;
140 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
141 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
142 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
143 return i;
145 return -1;
148 /* Returns -1 if this slot is not totally contained on any other,
149 * and the number of the slot otherwise */
150 static int get_container_slot(uint64_t phys_addr, unsigned long size)
152 int i;
154 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
155 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
156 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
157 return i;
158 return -1;
161 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
163 int slot = get_container_slot(phys_addr, size);
164 if (slot == -1)
165 return 0;
166 return 1;
170 * dirty pages logging control
172 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
173 unsigned long phys_addr,
174 unsigned flags,
175 unsigned mask)
177 int r = -1;
178 int slot = get_slot(phys_addr);
180 if (slot == -1) {
181 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
182 return 1;
185 flags = (slots[slot].flags & ~mask) | flags;
186 if (flags == slots[slot].flags)
187 return 0;
188 slots[slot].flags = flags;
191 struct kvm_userspace_memory_region mem = {
192 .slot = slot,
193 .memory_size = slots[slot].len,
194 .guest_phys_addr = slots[slot].phys_addr,
195 .userspace_addr = slots[slot].userspace_addr,
196 .flags = slots[slot].flags,
200 DPRINTF("slot %d start %llx len %llx flags %x\n",
201 mem.slot,
202 mem.guest_phys_addr,
203 mem.memory_size,
204 mem.flags);
205 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
206 if (r == -1)
207 fprintf(stderr, "%s: %m\n", __FUNCTION__);
209 return r;
212 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
213 int (*change)(kvm_context_t kvm,
214 uint64_t start,
215 uint64_t len))
217 int i, r;
219 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
220 if (slots[i].len)
221 r = change(kvm, slots[i].phys_addr, slots[i].len);
223 return r;
226 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
227 uint64_t phys_addr,
228 uint64_t len)
230 int slot = get_slot(phys_addr);
232 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
233 if (slot == -1) {
234 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
235 return -EINVAL;
238 if (slots[slot].logging_count++)
239 return 0;
241 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
242 KVM_MEM_LOG_DIRTY_PAGES,
243 KVM_MEM_LOG_DIRTY_PAGES);
246 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
247 uint64_t phys_addr,
248 uint64_t len)
250 int slot = get_slot(phys_addr);
252 if (slot == -1) {
253 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
254 return -EINVAL;
257 if (--slots[slot].logging_count)
258 return 0;
260 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
262 KVM_MEM_LOG_DIRTY_PAGES);
266 * Enable dirty page logging for all memory regions
268 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
270 if (kvm->dirty_pages_log_all)
271 return 0;
272 kvm->dirty_pages_log_all = 1;
273 return kvm_dirty_pages_log_change_all(kvm,
274 kvm_dirty_pages_log_enable_slot);
278 * Enable dirty page logging only for memory regions that were created with
279 * dirty logging enabled (disable for all other memory regions).
281 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
283 if (!kvm->dirty_pages_log_all)
284 return 0;
285 kvm->dirty_pages_log_all = 0;
286 return kvm_dirty_pages_log_change_all(kvm,
287 kvm_dirty_pages_log_disable_slot);
291 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
292 void *opaque)
294 int fd;
295 kvm_context_t kvm;
296 int r, gsi_count;
298 fd = open("/dev/kvm", O_RDWR);
299 if (fd == -1) {
300 perror("open /dev/kvm");
301 return NULL;
303 r = ioctl(fd, KVM_GET_API_VERSION, 0);
304 if (r == -1) {
305 fprintf(stderr, "kvm kernel version too old: "
306 "KVM_GET_API_VERSION ioctl not supported\n");
307 goto out_close;
309 if (r < EXPECTED_KVM_API_VERSION) {
310 fprintf(stderr, "kvm kernel version too old: "
311 "We expect API version %d or newer, but got "
312 "version %d\n",
313 EXPECTED_KVM_API_VERSION, r);
314 goto out_close;
316 if (r > EXPECTED_KVM_API_VERSION) {
317 fprintf(stderr, "kvm userspace version too old\n");
318 goto out_close;
320 kvm_abi = r;
321 kvm_page_size = getpagesize();
322 kvm = malloc(sizeof(*kvm));
323 if (kvm == NULL)
324 goto out_close;
325 memset(kvm, 0, sizeof(*kvm));
326 kvm->fd = fd;
327 kvm->vm_fd = -1;
328 kvm->callbacks = callbacks;
329 kvm->opaque = opaque;
330 kvm->dirty_pages_log_all = 0;
331 kvm->no_irqchip_creation = 0;
332 kvm->no_pit_creation = 0;
334 gsi_count = kvm_get_gsi_count(kvm);
335 if (gsi_count > 0) {
336 int gsi_bits, i;
338 /* Round up so we can search ints using ffs */
339 gsi_bits = ALIGN(gsi_count, 32);
340 kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
341 if (!kvm->used_gsi_bitmap)
342 goto out_close;
343 memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
344 kvm->max_gsi = gsi_bits;
346 /* Mark any over-allocated bits as already in use */
347 for (i = gsi_count; i < gsi_bits; i++)
348 set_gsi(kvm, i);
351 return kvm;
352 out_close:
353 close(fd);
354 return NULL;
357 void kvm_finalize(kvm_context_t kvm)
359 if (kvm->vcpu_fd[0] != -1)
360 close(kvm->vcpu_fd[0]);
361 if (kvm->vm_fd != -1)
362 close(kvm->vm_fd);
363 close(kvm->fd);
364 free(kvm);
367 void kvm_disable_irqchip_creation(kvm_context_t kvm)
369 kvm->no_irqchip_creation = 1;
372 void kvm_disable_pit_creation(kvm_context_t kvm)
374 kvm->no_pit_creation = 1;
377 int kvm_create_vcpu(kvm_context_t kvm, int slot)
379 long mmap_size;
380 int r;
382 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, slot);
383 if (r == -1) {
384 r = -errno;
385 fprintf(stderr, "kvm_create_vcpu: %m\n");
386 return r;
388 kvm->vcpu_fd[slot] = r;
389 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
390 if (mmap_size == -1) {
391 r = -errno;
392 fprintf(stderr, "get vcpu mmap size: %m\n");
393 return r;
395 kvm->run[slot] = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
396 kvm->vcpu_fd[slot], 0);
397 if (kvm->run[slot] == MAP_FAILED) {
398 r = -errno;
399 fprintf(stderr, "mmap vcpu area: %m\n");
400 return r;
402 return 0;
405 int kvm_create_vm(kvm_context_t kvm)
407 int fd = kvm->fd;
409 #ifdef KVM_CAP_IRQ_ROUTING
410 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
411 if (!kvm->irq_routes)
412 return -ENOMEM;
413 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
414 kvm->nr_allocated_irq_routes = 0;
415 #endif
417 kvm->vcpu_fd[0] = -1;
419 fd = ioctl(fd, KVM_CREATE_VM, 0);
420 if (fd == -1) {
421 fprintf(stderr, "kvm_create_vm: %m\n");
422 return -1;
424 kvm->vm_fd = fd;
425 return 0;
428 static int kvm_create_default_phys_mem(kvm_context_t kvm,
429 unsigned long phys_mem_bytes,
430 void **vm_mem)
432 #ifdef KVM_CAP_USER_MEMORY
433 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
434 if (r > 0)
435 return 0;
436 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
437 #else
438 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
439 #endif
440 return -1;
443 int kvm_check_extension(kvm_context_t kvm, int ext)
445 int ret;
447 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
448 if (ret > 0)
449 return ret;
450 return 0;
453 void kvm_create_irqchip(kvm_context_t kvm)
455 int r;
457 kvm->irqchip_in_kernel = 0;
458 #ifdef KVM_CAP_IRQCHIP
459 if (!kvm->no_irqchip_creation) {
460 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
461 if (r > 0) { /* kernel irqchip supported */
462 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
463 if (r >= 0) {
464 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
465 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
466 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
467 KVM_CAP_IRQ_INJECT_STATUS);
468 if (r > 0)
469 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
470 #endif
471 kvm->irqchip_in_kernel = 1;
473 else
474 fprintf(stderr, "Create kernel PIC irqchip failed\n");
477 #endif
480 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
482 int r;
484 r = kvm_create_vm(kvm);
485 if (r < 0)
486 return r;
487 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
488 if (r < 0)
489 return r;
490 init_slots();
491 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
492 if (r < 0)
493 return r;
494 kvm_create_irqchip(kvm);
496 return 0;
500 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
501 unsigned long len, int log, int writable)
503 int r;
504 int prot = PROT_READ;
505 void *ptr;
506 struct kvm_userspace_memory_region memory = {
507 .memory_size = len,
508 .guest_phys_addr = phys_start,
509 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
512 if (writable)
513 prot |= PROT_WRITE;
515 #if !defined(__s390__)
516 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
517 #else
518 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
519 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
520 #endif
521 if (ptr == MAP_FAILED) {
522 fprintf(stderr, "%s: %s", __func__, strerror(errno));
523 return 0;
526 memset(ptr, 0, len);
528 memory.userspace_addr = (unsigned long)ptr;
529 memory.slot = get_free_slot(kvm);
530 DPRINTF("slot %d start %llx len %llx flags %x\n",
531 memory.slot,
532 memory.guest_phys_addr,
533 memory.memory_size,
534 memory.flags);
535 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
536 if (r == -1) {
537 fprintf(stderr, "%s: %s", __func__, strerror(errno));
538 return 0;
540 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
541 memory.userspace_addr, memory.flags);
543 return ptr;
546 int kvm_register_phys_mem(kvm_context_t kvm,
547 unsigned long phys_start, void *userspace_addr,
548 unsigned long len, int log)
551 struct kvm_userspace_memory_region memory = {
552 .memory_size = len,
553 .guest_phys_addr = phys_start,
554 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
555 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
557 int r;
559 memory.slot = get_free_slot(kvm);
560 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
561 memory.guest_phys_addr, memory.memory_size,
562 memory.userspace_addr, memory.slot, memory.flags);
563 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
564 if (r == -1) {
565 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
566 return -1;
568 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
569 memory.userspace_addr, memory.flags);
570 return 0;
574 /* destroy/free a whole slot.
575 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
577 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
578 unsigned long len)
580 int slot;
581 int r;
582 struct kvm_userspace_memory_region memory = {
583 .memory_size = 0,
584 .guest_phys_addr = phys_start,
585 .userspace_addr = 0,
586 .flags = 0,
589 slot = get_slot(phys_start);
591 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
592 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
593 __FUNCTION__, slot);
594 return;
596 if (phys_start != slots[slot].phys_addr) {
597 fprintf(stderr,
598 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
599 __FUNCTION__, phys_start, slots[slot].phys_addr);
600 phys_start = slots[slot].phys_addr;
603 memory.slot = slot;
604 DPRINTF("slot %d start %llx len %llx flags %x\n",
605 memory.slot,
606 memory.guest_phys_addr,
607 memory.memory_size,
608 memory.flags);
609 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
610 if (r == -1) {
611 fprintf(stderr, "destroy_userspace_phys_mem: %s",
612 strerror(errno));
613 return;
616 free_slot(memory.slot);
619 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
622 int slot = get_container_slot(phys_addr, size);
624 if (slot != -1) {
625 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
626 kvm_destroy_phys_mem(kvm, phys_addr, size);
627 return;
631 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
633 int r;
634 struct kvm_dirty_log log = {
635 .slot = slot,
638 log.dirty_bitmap = buf;
640 r = ioctl(kvm->vm_fd, ioctl_num, &log);
641 if (r == -1)
642 return -errno;
643 return 0;
646 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
648 int slot;
650 slot = get_slot(phys_addr);
651 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
654 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
655 unsigned long len, void *buf, void *opaque,
656 int (*cb)(unsigned long start, unsigned long len,
657 void*bitmap, void *opaque))
659 int i;
660 int r;
661 unsigned long end_addr = phys_addr + len;
663 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
664 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
665 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
666 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
667 if (r)
668 return r;
669 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
670 if (r)
671 return r;
674 return 0;
677 #ifdef KVM_CAP_IRQCHIP
679 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
681 struct kvm_irq_level event;
682 int r;
684 if (!kvm->irqchip_in_kernel)
685 return 0;
686 event.level = level;
687 event.irq = irq;
688 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
689 if (r == -1)
690 perror("kvm_set_irq_level");
692 if (status) {
693 #ifdef KVM_CAP_IRQ_INJECT_STATUS
694 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
695 1 : event.status;
696 #else
697 *status = 1;
698 #endif
701 return 1;
704 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
706 int r;
708 if (!kvm->irqchip_in_kernel)
709 return 0;
710 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
711 if (r == -1) {
712 r = -errno;
713 perror("kvm_get_irqchip\n");
715 return r;
718 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
720 int r;
722 if (!kvm->irqchip_in_kernel)
723 return 0;
724 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
725 if (r == -1) {
726 r = -errno;
727 perror("kvm_set_irqchip\n");
729 return r;
732 #endif
734 static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
736 uint16_t addr = run->io.port;
737 int r;
738 int i;
739 void *p = (void *)run + run->io.data_offset;
741 for (i = 0; i < run->io.count; ++i) {
742 switch (run->io.direction) {
743 case KVM_EXIT_IO_IN:
744 switch (run->io.size) {
745 case 1:
746 r = kvm->callbacks->inb(kvm->opaque, addr, p);
747 break;
748 case 2:
749 r = kvm->callbacks->inw(kvm->opaque, addr, p);
750 break;
751 case 4:
752 r = kvm->callbacks->inl(kvm->opaque, addr, p);
753 break;
754 default:
755 fprintf(stderr, "bad I/O size %d\n", run->io.size);
756 return -EMSGSIZE;
758 break;
759 case KVM_EXIT_IO_OUT:
760 switch (run->io.size) {
761 case 1:
762 r = kvm->callbacks->outb(kvm->opaque, addr,
763 *(uint8_t *)p);
764 break;
765 case 2:
766 r = kvm->callbacks->outw(kvm->opaque, addr,
767 *(uint16_t *)p);
768 break;
769 case 4:
770 r = kvm->callbacks->outl(kvm->opaque, addr,
771 *(uint32_t *)p);
772 break;
773 default:
774 fprintf(stderr, "bad I/O size %d\n", run->io.size);
775 return -EMSGSIZE;
777 break;
778 default:
779 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
780 return -EPROTO;
783 p += run->io.size;
786 return 0;
789 int handle_debug(kvm_context_t kvm, int vcpu, void *env)
791 #ifdef KVM_CAP_SET_GUEST_DEBUG
792 struct kvm_run *run = kvm->run[vcpu];
794 return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch);
795 #else
796 return 0;
797 #endif
800 int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
802 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
805 int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
807 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
810 int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
812 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_FPU, fpu);
815 int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
817 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_FPU, fpu);
820 int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
822 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
825 int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
827 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
830 #ifdef KVM_CAP_MP_STATE
831 int kvm_get_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
833 int r;
835 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
836 if (r > 0)
837 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MP_STATE, mp_state);
838 return -ENOSYS;
841 int kvm_set_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
843 int r;
845 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
846 if (r > 0)
847 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MP_STATE, mp_state);
848 return -ENOSYS;
850 #endif
852 static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
854 unsigned long addr = kvm_run->mmio.phys_addr;
855 void *data = kvm_run->mmio.data;
857 /* hack: Red Hat 7.1 generates these weird accesses. */
858 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
859 return 0;
861 if (kvm_run->mmio.is_write)
862 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
863 kvm_run->mmio.len);
864 else
865 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
866 kvm_run->mmio.len);
869 int handle_io_window(kvm_context_t kvm)
871 return kvm->callbacks->io_window(kvm->opaque);
874 int handle_halt(kvm_context_t kvm, int vcpu)
876 return kvm->callbacks->halt(kvm->opaque, vcpu);
879 int handle_shutdown(kvm_context_t kvm, void *env)
881 return kvm->callbacks->shutdown(kvm->opaque, env);
884 int try_push_interrupts(kvm_context_t kvm)
886 return kvm->callbacks->try_push_interrupts(kvm->opaque);
889 static inline void push_nmi(kvm_context_t kvm)
891 #ifdef KVM_CAP_USER_NMI
892 kvm->callbacks->push_nmi(kvm->opaque);
893 #endif /* KVM_CAP_USER_NMI */
896 void post_kvm_run(kvm_context_t kvm, void *env)
898 kvm->callbacks->post_kvm_run(kvm->opaque, env);
901 int pre_kvm_run(kvm_context_t kvm, void *env)
903 return kvm->callbacks->pre_kvm_run(kvm->opaque, env);
906 int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu)
908 struct kvm_run *run = kvm->run[vcpu];
910 return run->if_flag;
913 int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu)
915 struct kvm_run *run = kvm->run[vcpu];
917 return run->ready_for_interrupt_injection;
920 int kvm_run(kvm_context_t kvm, int vcpu, void *env)
922 int r;
923 int fd = kvm->vcpu_fd[vcpu];
924 struct kvm_run *run = kvm->run[vcpu];
926 again:
927 push_nmi(kvm);
928 #if !defined(__s390__)
929 if (!kvm->irqchip_in_kernel)
930 run->request_interrupt_window = try_push_interrupts(kvm);
931 #endif
932 r = pre_kvm_run(kvm, env);
933 if (r)
934 return r;
935 r = ioctl(fd, KVM_RUN, 0);
937 if (r == -1 && errno != EINTR && errno != EAGAIN) {
938 r = -errno;
939 post_kvm_run(kvm, env);
940 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
941 return r;
944 post_kvm_run(kvm, env);
946 #if defined(KVM_CAP_COALESCED_MMIO)
947 if (kvm->coalesced_mmio) {
948 struct kvm_coalesced_mmio_ring *ring = (void *)run +
949 kvm->coalesced_mmio * PAGE_SIZE;
950 while (ring->first != ring->last) {
951 kvm->callbacks->mmio_write(kvm->opaque,
952 ring->coalesced_mmio[ring->first].phys_addr,
953 &ring->coalesced_mmio[ring->first].data[0],
954 ring->coalesced_mmio[ring->first].len);
955 smp_wmb();
956 ring->first = (ring->first + 1) %
957 KVM_COALESCED_MMIO_MAX;
960 #endif
962 #if !defined(__s390__)
963 if (r == -1) {
964 r = handle_io_window(kvm);
965 goto more;
967 #endif
968 if (1) {
969 switch (run->exit_reason) {
970 case KVM_EXIT_UNKNOWN:
971 fprintf(stderr, "unhandled vm exit: 0x%x vcpu_id %d\n",
972 (unsigned)run->hw.hardware_exit_reason, vcpu);
973 kvm_show_regs(kvm, vcpu);
974 abort();
975 break;
976 case KVM_EXIT_FAIL_ENTRY:
977 fprintf(stderr, "kvm_run: failed entry, reason %u\n",
978 (unsigned)run->fail_entry.hardware_entry_failure_reason & 0xffff);
979 kvm_show_regs(kvm, vcpu);
980 return -ENOEXEC;
981 break;
982 case KVM_EXIT_EXCEPTION:
983 fprintf(stderr, "exception %d (%x)\n",
984 run->ex.exception,
985 run->ex.error_code);
986 kvm_show_regs(kvm, vcpu);
987 kvm_show_code(kvm, vcpu);
988 abort();
989 break;
990 case KVM_EXIT_IO:
991 r = handle_io(kvm, run, vcpu);
992 break;
993 case KVM_EXIT_DEBUG:
994 r = handle_debug(kvm, vcpu, env);
995 break;
996 case KVM_EXIT_MMIO:
997 r = handle_mmio(kvm, run);
998 break;
999 case KVM_EXIT_HLT:
1000 r = handle_halt(kvm, vcpu);
1001 break;
1002 case KVM_EXIT_IRQ_WINDOW_OPEN:
1003 break;
1004 case KVM_EXIT_SHUTDOWN:
1005 r = handle_shutdown(kvm, env);
1006 break;
1007 #if defined(__s390__)
1008 case KVM_EXIT_S390_SIEIC:
1009 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
1010 run);
1011 break;
1012 case KVM_EXIT_S390_RESET:
1013 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
1014 break;
1015 #endif
1016 default:
1017 if (kvm_arch_run(run, kvm, vcpu)) {
1018 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1019 run->exit_reason);
1020 kvm_show_regs(kvm, vcpu);
1021 abort();
1023 break;
1026 more:
1027 if (!r)
1028 goto again;
1029 return r;
1032 int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
1034 struct kvm_interrupt intr;
1036 intr.irq = irq;
1037 return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
1040 #ifdef KVM_CAP_SET_GUEST_DEBUG
1041 int kvm_set_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_guest_debug *dbg)
1043 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_GUEST_DEBUG, dbg);
1045 #endif
1047 int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset)
1049 struct kvm_signal_mask *sigmask;
1050 int r;
1052 if (!sigset) {
1053 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, NULL);
1054 if (r == -1)
1055 r = -errno;
1056 return r;
1058 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1059 if (!sigmask)
1060 return -ENOMEM;
1062 sigmask->len = 8;
1063 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1064 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, sigmask);
1065 if (r == -1)
1066 r = -errno;
1067 free(sigmask);
1068 return r;
1071 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1073 return kvm->irqchip_in_kernel;
1076 int kvm_pit_in_kernel(kvm_context_t kvm)
1078 return kvm->pit_in_kernel;
1081 int kvm_has_sync_mmu(kvm_context_t kvm)
1083 int r = 0;
1084 #ifdef KVM_CAP_SYNC_MMU
1085 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1086 #endif
1087 return r;
1090 int kvm_inject_nmi(kvm_context_t kvm, int vcpu)
1092 #ifdef KVM_CAP_USER_NMI
1093 return ioctl(kvm->vcpu_fd[vcpu], KVM_NMI);
1094 #else
1095 return -ENOSYS;
1096 #endif
1099 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1101 int r = 0;
1102 kvm->coalesced_mmio = 0;
1103 #ifdef KVM_CAP_COALESCED_MMIO
1104 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1105 if (r > 0) {
1106 kvm->coalesced_mmio = r;
1107 return 0;
1109 #endif
1110 return r;
1113 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1115 #ifdef KVM_CAP_COALESCED_MMIO
1116 struct kvm_coalesced_mmio_zone zone;
1117 int r;
1119 if (kvm->coalesced_mmio) {
1121 zone.addr = addr;
1122 zone.size = size;
1124 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1125 if (r == -1) {
1126 perror("kvm_register_coalesced_mmio_zone");
1127 return -errno;
1129 return 0;
1131 #endif
1132 return -ENOSYS;
1135 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1137 #ifdef KVM_CAP_COALESCED_MMIO
1138 struct kvm_coalesced_mmio_zone zone;
1139 int r;
1141 if (kvm->coalesced_mmio) {
1143 zone.addr = addr;
1144 zone.size = size;
1146 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1147 if (r == -1) {
1148 perror("kvm_unregister_coalesced_mmio_zone");
1149 return -errno;
1151 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1152 return 0;
1154 #endif
1155 return -ENOSYS;
1158 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1159 int kvm_assign_pci_device(kvm_context_t kvm,
1160 struct kvm_assigned_pci_dev *assigned_dev)
1162 int ret;
1164 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1165 if (ret < 0)
1166 return -errno;
1168 return ret;
1171 static int kvm_old_assign_irq(kvm_context_t kvm,
1172 struct kvm_assigned_irq *assigned_irq)
1174 int ret;
1176 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1177 if (ret < 0)
1178 return -errno;
1180 return ret;
1183 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1184 int kvm_assign_irq(kvm_context_t kvm,
1185 struct kvm_assigned_irq *assigned_irq)
1187 int ret;
1189 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1190 if (ret > 0) {
1191 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1192 if (ret < 0)
1193 return -errno;
1194 return ret;
1197 return kvm_old_assign_irq(kvm, assigned_irq);
1200 int kvm_deassign_irq(kvm_context_t kvm,
1201 struct kvm_assigned_irq *assigned_irq)
1203 int ret;
1205 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1206 if (ret < 0)
1207 return -errno;
1209 return ret;
1211 #else
1212 int kvm_assign_irq(kvm_context_t kvm,
1213 struct kvm_assigned_irq *assigned_irq)
1215 return kvm_old_assign_irq(kvm, assigned_irq);
1217 #endif
1218 #endif
1220 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1221 int kvm_deassign_pci_device(kvm_context_t kvm,
1222 struct kvm_assigned_pci_dev *assigned_dev)
1224 int ret;
1226 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1227 if (ret < 0)
1228 return -errno;
1230 return ret;
1232 #endif
1234 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1236 int ret = 0;
1238 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1239 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1240 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1241 if (ret <= 0)
1242 ret = 0;
1243 #endif
1244 return ret;
1247 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1249 #ifdef KVM_CAP_REINJECT_CONTROL
1250 int r;
1251 struct kvm_reinject_control control;
1253 control.pit_reinject = pit_reinject;
1255 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1256 if (r > 0) {
1257 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1258 if (r == -1)
1259 return -errno;
1260 return r;
1262 #endif
1263 return -ENOSYS;
1266 int kvm_has_gsi_routing(kvm_context_t kvm)
1268 int r = 0;
1270 #ifdef KVM_CAP_IRQ_ROUTING
1271 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1272 #endif
1273 return r;
1276 int kvm_get_gsi_count(kvm_context_t kvm)
1278 #ifdef KVM_CAP_IRQ_ROUTING
1279 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1280 #else
1281 return -EINVAL;
1282 #endif
1285 int kvm_clear_gsi_routes(kvm_context_t kvm)
1287 #ifdef KVM_CAP_IRQ_ROUTING
1288 kvm->irq_routes->nr = 0;
1289 return 0;
1290 #else
1291 return -EINVAL;
1292 #endif
1295 int kvm_add_routing_entry(kvm_context_t kvm,
1296 struct kvm_irq_routing_entry* entry)
1298 #ifdef KVM_CAP_IRQ_ROUTING
1299 struct kvm_irq_routing *z;
1300 struct kvm_irq_routing_entry *new;
1301 int n, size;
1303 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1304 n = kvm->nr_allocated_irq_routes * 2;
1305 if (n < 64)
1306 n = 64;
1307 size = sizeof(struct kvm_irq_routing);
1308 size += n * sizeof(*new);
1309 z = realloc(kvm->irq_routes, size);
1310 if (!z)
1311 return -ENOMEM;
1312 kvm->nr_allocated_irq_routes = n;
1313 kvm->irq_routes = z;
1315 n = kvm->irq_routes->nr++;
1316 new = &kvm->irq_routes->entries[n];
1317 memset(new, 0, sizeof(*new));
1318 new->gsi = entry->gsi;
1319 new->type = entry->type;
1320 new->flags = entry->flags;
1321 new->u = entry->u;
1323 set_gsi(kvm, entry->gsi);
1325 return 0;
1326 #else
1327 return -ENOSYS;
1328 #endif
1331 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1333 #ifdef KVM_CAP_IRQ_ROUTING
1334 struct kvm_irq_routing_entry e;
1336 e.gsi = gsi;
1337 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1338 e.flags = 0;
1339 e.u.irqchip.irqchip = irqchip;
1340 e.u.irqchip.pin = pin;
1341 return kvm_add_routing_entry(kvm, &e);
1342 #else
1343 return -ENOSYS;
1344 #endif
1347 int kvm_del_routing_entry(kvm_context_t kvm,
1348 struct kvm_irq_routing_entry* entry)
1350 #ifdef KVM_CAP_IRQ_ROUTING
1351 struct kvm_irq_routing_entry *e, *p;
1352 int i, gsi, found = 0;
1354 gsi = entry->gsi;
1356 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1357 e = &kvm->irq_routes->entries[i];
1358 if (e->type == entry->type
1359 && e->gsi == gsi) {
1360 switch (e->type)
1362 case KVM_IRQ_ROUTING_IRQCHIP: {
1363 if (e->u.irqchip.irqchip ==
1364 entry->u.irqchip.irqchip
1365 && e->u.irqchip.pin ==
1366 entry->u.irqchip.pin) {
1367 p = &kvm->irq_routes->
1368 entries[--kvm->irq_routes->nr];
1369 *e = *p;
1370 found = 1;
1372 break;
1374 case KVM_IRQ_ROUTING_MSI: {
1375 if (e->u.msi.address_lo ==
1376 entry->u.msi.address_lo
1377 && e->u.msi.address_hi ==
1378 entry->u.msi.address_hi
1379 && e->u.msi.data == entry->u.msi.data) {
1380 p = &kvm->irq_routes->
1381 entries[--kvm->irq_routes->nr];
1382 *e = *p;
1383 found = 1;
1385 break;
1387 default:
1388 break;
1390 if (found) {
1391 /* If there are no other users of this GSI
1392 * mark it available in the bitmap */
1393 for (i = 0; i < kvm->irq_routes->nr; i++) {
1394 e = &kvm->irq_routes->entries[i];
1395 if (e->gsi == gsi)
1396 break;
1398 if (i == kvm->irq_routes->nr)
1399 clear_gsi(kvm, gsi);
1401 return 0;
1405 return -ESRCH;
1406 #else
1407 return -ENOSYS;
1408 #endif
1411 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1413 #ifdef KVM_CAP_IRQ_ROUTING
1414 struct kvm_irq_routing_entry e;
1416 e.gsi = gsi;
1417 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1418 e.flags = 0;
1419 e.u.irqchip.irqchip = irqchip;
1420 e.u.irqchip.pin = pin;
1421 return kvm_del_routing_entry(kvm, &e);
1422 #else
1423 return -ENOSYS;
1424 #endif
1427 int kvm_commit_irq_routes(kvm_context_t kvm)
1429 #ifdef KVM_CAP_IRQ_ROUTING
1430 int r;
1432 kvm->irq_routes->flags = 0;
1433 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1434 if (r == -1)
1435 r = -errno;
1436 return r;
1437 #else
1438 return -ENOSYS;
1439 #endif
1442 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1444 int i, bit;
1445 uint32_t *buf = kvm->used_gsi_bitmap;
1447 /* Return the lowest unused GSI in the bitmap */
1448 for (i = 0; i < kvm->max_gsi / 32; i++) {
1449 bit = ffs(~buf[i]);
1450 if (!bit)
1451 continue;
1453 return bit - 1 + i * 32;
1456 return -ENOSPC;
1459 #ifdef KVM_CAP_DEVICE_MSIX
1460 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1461 struct kvm_assigned_msix_nr *msix_nr)
1463 int ret;
1465 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1466 if (ret < 0)
1467 return -errno;
1469 return ret;
1472 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1473 struct kvm_assigned_msix_entry *entry)
1475 int ret;
1477 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1478 if (ret < 0)
1479 return -errno;
1481 return ret;
1483 #endif