Merge commit 'b947c12c0bb217fe09968e652873e0d22b269d68' into upstream-merge
[qemu-kvm.git] / kvm-all.c
blobe7d7f6a2777070e82fc27e635fe877e9ea70ff44
1 /*
2 * QEMU KVM support
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include <sys/types.h>
17 #include <sys/ioctl.h>
18 #include <sys/mman.h>
19 #include <stdarg.h>
21 #include <linux/kvm.h>
23 #include "qemu-common.h"
24 #include "qemu-barrier.h"
25 #include "sysemu.h"
26 #include "hw/hw.h"
27 #include "gdbstub.h"
28 #include "kvm.h"
29 #include "bswap.h"
31 /* This check must be after config-host.h is included */
32 #ifdef CONFIG_EVENTFD
33 #include <sys/eventfd.h>
34 #endif
36 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
37 #define PAGE_SIZE TARGET_PAGE_SIZE
39 //#define DEBUG_KVM
41 #ifdef DEBUG_KVM
42 #define DPRINTF(fmt, ...) \
43 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
44 #else
45 #define DPRINTF(fmt, ...) \
46 do { } while (0)
47 #endif
49 #ifdef OBSOLETE_KVM_IMPL
51 typedef struct KVMSlot
53 target_phys_addr_t start_addr;
54 ram_addr_t memory_size;
55 ram_addr_t phys_offset;
56 int slot;
57 int flags;
58 } KVMSlot;
60 typedef struct kvm_dirty_log KVMDirtyLog;
62 struct KVMState
64 KVMSlot slots[32];
65 int fd;
66 int vmfd;
67 int coalesced_mmio;
68 #ifdef KVM_CAP_COALESCED_MMIO
69 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
70 #endif
71 int broken_set_mem_region;
72 int migration_log;
73 int vcpu_events;
74 int robust_singlestep;
75 int debugregs;
76 #ifdef KVM_CAP_SET_GUEST_DEBUG
77 struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
78 #endif
79 int irqchip_in_kernel;
80 int pit_in_kernel;
81 int xsave, xcrs;
82 int many_ioeventfds;
85 static KVMState *kvm_state;
87 #endif
89 static KVMSlot *kvm_alloc_slot(KVMState *s)
91 int i;
93 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
94 /* KVM private memory slots */
95 if (i >= 8 && i < 12)
96 continue;
97 if (s->slots[i].memory_size == 0)
98 return &s->slots[i];
101 fprintf(stderr, "%s: no free slot available\n", __func__);
102 abort();
105 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
106 target_phys_addr_t start_addr,
107 target_phys_addr_t end_addr)
109 int i;
111 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
112 KVMSlot *mem = &s->slots[i];
114 if (start_addr == mem->start_addr &&
115 end_addr == mem->start_addr + mem->memory_size) {
116 return mem;
120 return NULL;
124 * Find overlapping slot with lowest start address
126 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
127 target_phys_addr_t start_addr,
128 target_phys_addr_t end_addr)
130 KVMSlot *found = NULL;
131 int i;
133 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
134 KVMSlot *mem = &s->slots[i];
136 if (mem->memory_size == 0 ||
137 (found && found->start_addr < mem->start_addr)) {
138 continue;
141 if (end_addr > mem->start_addr &&
142 start_addr < mem->start_addr + mem->memory_size) {
143 found = mem;
147 return found;
150 int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
151 target_phys_addr_t *phys_addr)
153 int i;
155 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
156 KVMSlot *mem = &s->slots[i];
158 if (ram_addr >= mem->phys_offset &&
159 ram_addr < mem->phys_offset + mem->memory_size) {
160 *phys_addr = mem->start_addr + (ram_addr - mem->phys_offset);
161 return 1;
165 return 0;
168 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
170 struct kvm_userspace_memory_region mem;
172 mem.slot = slot->slot;
173 mem.guest_phys_addr = slot->start_addr;
174 mem.memory_size = slot->memory_size;
175 mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
176 mem.flags = slot->flags;
177 if (s->migration_log) {
178 mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
180 return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
183 #ifdef OBSOLETE_KVM_IMPL
184 static void kvm_reset_vcpu(void *opaque)
186 CPUState *env = opaque;
188 kvm_arch_reset_vcpu(env);
190 #endif
192 int kvm_irqchip_in_kernel(void)
194 return kvm_state->irqchip_in_kernel;
197 int kvm_pit_in_kernel(void)
199 return kvm_state->pit_in_kernel;
203 #ifdef OBSOLETE_KVM_IMPL
204 int kvm_init_vcpu(CPUState *env)
206 KVMState *s = kvm_state;
207 long mmap_size;
208 int ret;
210 DPRINTF("kvm_init_vcpu\n");
212 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
213 if (ret < 0) {
214 DPRINTF("kvm_create_vcpu failed\n");
215 goto err;
218 env->kvm_fd = ret;
219 env->kvm_state = s;
221 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
222 if (mmap_size < 0) {
223 DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
224 goto err;
227 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
228 env->kvm_fd, 0);
229 if (env->kvm_run == MAP_FAILED) {
230 ret = -errno;
231 DPRINTF("mmap'ing vcpu state failed\n");
232 goto err;
235 #ifdef KVM_CAP_COALESCED_MMIO
236 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
237 s->coalesced_mmio_ring = (void *) env->kvm_run +
238 s->coalesced_mmio * PAGE_SIZE;
239 #endif
241 ret = kvm_arch_init_vcpu(env);
242 if (ret == 0) {
243 qemu_register_reset(kvm_reset_vcpu, env);
244 kvm_arch_reset_vcpu(env);
246 err:
247 return ret;
249 #endif
252 * dirty pages logging control
254 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
255 ram_addr_t size, int flags, int mask)
257 KVMState *s = kvm_state;
258 KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
259 int old_flags;
261 if (mem == NULL) {
262 fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
263 TARGET_FMT_plx "\n", __func__, phys_addr,
264 (target_phys_addr_t)(phys_addr + size - 1));
265 return -EINVAL;
268 old_flags = mem->flags;
270 flags = (mem->flags & ~mask) | flags;
271 mem->flags = flags;
273 /* If nothing changed effectively, no need to issue ioctl */
274 if (s->migration_log) {
275 flags |= KVM_MEM_LOG_DIRTY_PAGES;
277 if (flags == old_flags) {
278 return 0;
281 return kvm_set_user_memory_region(s, mem);
284 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
286 return kvm_dirty_pages_log_change(phys_addr, size,
287 KVM_MEM_LOG_DIRTY_PAGES,
288 KVM_MEM_LOG_DIRTY_PAGES);
291 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
293 return kvm_dirty_pages_log_change(phys_addr, size,
295 KVM_MEM_LOG_DIRTY_PAGES);
298 static int kvm_set_migration_log(int enable)
300 KVMState *s = kvm_state;
301 KVMSlot *mem;
302 int i, err;
304 s->migration_log = enable;
306 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
307 mem = &s->slots[i];
309 if (!mem->memory_size) {
310 continue;
312 if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
313 continue;
315 err = kvm_set_user_memory_region(s, mem);
316 if (err) {
317 return err;
320 return 0;
323 /* get kvm's dirty pages bitmap and update qemu's */
324 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
325 unsigned long *bitmap,
326 unsigned long offset,
327 unsigned long mem_size)
329 unsigned int i, j;
330 unsigned long page_number, addr, addr1, c;
331 ram_addr_t ram_addr;
332 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
333 HOST_LONG_BITS;
336 * bitmap-traveling is faster than memory-traveling (for addr...)
337 * especially when most of the memory is not dirty.
339 for (i = 0; i < len; i++) {
340 if (bitmap[i] != 0) {
341 c = leul_to_cpu(bitmap[i]);
342 do {
343 j = ffsl(c) - 1;
344 c &= ~(1ul << j);
345 page_number = i * HOST_LONG_BITS + j;
346 addr1 = page_number * TARGET_PAGE_SIZE;
347 addr = offset + addr1;
348 ram_addr = cpu_get_physical_page_desc(addr);
349 cpu_physical_memory_set_dirty(ram_addr);
350 } while (c != 0);
353 return 0;
356 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
359 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
360 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
361 * This means all bits are set to dirty.
363 * @start_add: start of logged region.
364 * @end_addr: end of logged region.
366 static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
367 target_phys_addr_t end_addr)
369 KVMState *s = kvm_state;
370 unsigned long size, allocated_size = 0;
371 KVMDirtyLog d;
372 KVMSlot *mem;
373 int ret = 0;
375 d.dirty_bitmap = NULL;
376 while (start_addr < end_addr) {
377 mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
378 if (mem == NULL) {
379 break;
382 size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
383 if (!d.dirty_bitmap) {
384 d.dirty_bitmap = qemu_malloc(size);
385 } else if (size > allocated_size) {
386 d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
388 allocated_size = size;
389 memset(d.dirty_bitmap, 0, allocated_size);
391 d.slot = mem->slot;
393 if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
394 DPRINTF("ioctl failed %d\n", errno);
395 ret = -1;
396 break;
399 kvm_get_dirty_pages_log_range(mem->start_addr, d.dirty_bitmap,
400 mem->start_addr, mem->memory_size);
401 start_addr = mem->start_addr + mem->memory_size;
403 qemu_free(d.dirty_bitmap);
405 return ret;
408 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
410 int ret = -ENOSYS;
411 #ifdef KVM_CAP_COALESCED_MMIO
412 KVMState *s = kvm_state;
414 if (s->coalesced_mmio) {
415 struct kvm_coalesced_mmio_zone zone;
417 zone.addr = start;
418 zone.size = size;
420 ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
422 #endif
424 return ret;
427 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
429 int ret = -ENOSYS;
430 #ifdef KVM_CAP_COALESCED_MMIO
431 KVMState *s = kvm_state;
433 if (s->coalesced_mmio) {
434 struct kvm_coalesced_mmio_zone zone;
436 zone.addr = start;
437 zone.size = size;
439 ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
441 #endif
443 return ret;
446 int kvm_check_extension(KVMState *s, unsigned int extension)
448 int ret;
450 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
451 if (ret < 0) {
452 ret = 0;
455 return ret;
458 static int kvm_check_many_ioeventfds(void)
460 /* Older kernels have a 6 device limit on the KVM io bus. Find out so we
461 * can avoid creating too many ioeventfds.
463 #ifdef CONFIG_EVENTFD
464 int ioeventfds[7];
465 int i, ret = 0;
466 for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
467 ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
468 if (ioeventfds[i] < 0) {
469 break;
471 ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
472 if (ret < 0) {
473 close(ioeventfds[i]);
474 break;
478 /* Decide whether many devices are supported or not */
479 ret = i == ARRAY_SIZE(ioeventfds);
481 while (i-- > 0) {
482 kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
483 close(ioeventfds[i]);
485 return ret;
486 #else
487 return 0;
488 #endif
491 static void kvm_set_phys_mem(target_phys_addr_t start_addr,
492 ram_addr_t size,
493 ram_addr_t phys_offset)
495 KVMState *s = kvm_state;
496 ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
497 KVMSlot *mem, old;
498 int err;
500 /* kvm works in page size chunks, but the function may be called
501 with sub-page size and unaligned start address. */
502 size = TARGET_PAGE_ALIGN(size);
503 start_addr = TARGET_PAGE_ALIGN(start_addr);
505 /* KVM does not support read-only slots */
506 phys_offset &= ~IO_MEM_ROM;
508 while (1) {
509 mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
510 if (!mem) {
511 break;
514 if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
515 (start_addr + size <= mem->start_addr + mem->memory_size) &&
516 (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
517 /* The new slot fits into the existing one and comes with
518 * identical parameters - nothing to be done. */
519 return;
522 old = *mem;
524 /* unregister the overlapping slot */
525 mem->memory_size = 0;
526 err = kvm_set_user_memory_region(s, mem);
527 if (err) {
528 fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
529 __func__, strerror(-err));
530 abort();
533 /* Workaround for older KVM versions: we can't join slots, even not by
534 * unregistering the previous ones and then registering the larger
535 * slot. We have to maintain the existing fragmentation. Sigh.
537 * This workaround assumes that the new slot starts at the same
538 * address as the first existing one. If not or if some overlapping
539 * slot comes around later, we will fail (not seen in practice so far)
540 * - and actually require a recent KVM version. */
541 if (s->broken_set_mem_region &&
542 old.start_addr == start_addr && old.memory_size < size &&
543 flags < IO_MEM_UNASSIGNED) {
544 mem = kvm_alloc_slot(s);
545 mem->memory_size = old.memory_size;
546 mem->start_addr = old.start_addr;
547 mem->phys_offset = old.phys_offset;
548 mem->flags = 0;
550 err = kvm_set_user_memory_region(s, mem);
551 if (err) {
552 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
553 strerror(-err));
554 abort();
557 start_addr += old.memory_size;
558 phys_offset += old.memory_size;
559 size -= old.memory_size;
560 continue;
563 /* register prefix slot */
564 if (old.start_addr < start_addr) {
565 mem = kvm_alloc_slot(s);
566 mem->memory_size = start_addr - old.start_addr;
567 mem->start_addr = old.start_addr;
568 mem->phys_offset = old.phys_offset;
569 mem->flags = 0;
571 err = kvm_set_user_memory_region(s, mem);
572 if (err) {
573 fprintf(stderr, "%s: error registering prefix slot: %s\n",
574 __func__, strerror(-err));
575 abort();
579 /* register suffix slot */
580 if (old.start_addr + old.memory_size > start_addr + size) {
581 ram_addr_t size_delta;
583 mem = kvm_alloc_slot(s);
584 mem->start_addr = start_addr + size;
585 size_delta = mem->start_addr - old.start_addr;
586 mem->memory_size = old.memory_size - size_delta;
587 mem->phys_offset = old.phys_offset + size_delta;
588 mem->flags = 0;
590 err = kvm_set_user_memory_region(s, mem);
591 if (err) {
592 fprintf(stderr, "%s: error registering suffix slot: %s\n",
593 __func__, strerror(-err));
594 abort();
599 /* in case the KVM bug workaround already "consumed" the new slot */
600 if (!size)
601 return;
603 /* KVM does not need to know about this memory */
604 if (flags >= IO_MEM_UNASSIGNED)
605 return;
607 mem = kvm_alloc_slot(s);
608 mem->memory_size = size;
609 mem->start_addr = start_addr;
610 mem->phys_offset = phys_offset;
611 mem->flags = 0;
613 err = kvm_set_user_memory_region(s, mem);
614 if (err) {
615 fprintf(stderr, "%s: error registering slot: %s\n", __func__,
616 strerror(-err));
617 abort();
621 static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
622 target_phys_addr_t start_addr,
623 ram_addr_t size,
624 ram_addr_t phys_offset)
626 kvm_set_phys_mem(start_addr, size, phys_offset);
629 static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
630 target_phys_addr_t start_addr,
631 target_phys_addr_t end_addr)
633 return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
636 static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
637 int enable)
639 return kvm_set_migration_log(enable);
642 static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
643 .set_memory = kvm_client_set_memory,
644 .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
645 .migration_log = kvm_client_migration_log,
649 void kvm_cpu_register_phys_memory_client(void)
651 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
654 #ifdef OBSOLETE_KVM_IMPL
656 int kvm_init(int smp_cpus)
658 static const char upgrade_note[] =
659 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
660 "(see http://sourceforge.net/projects/kvm).\n";
661 KVMState *s;
662 int ret;
663 int i;
665 s = qemu_mallocz(sizeof(KVMState));
667 #ifdef KVM_CAP_SET_GUEST_DEBUG
668 QTAILQ_INIT(&s->kvm_sw_breakpoints);
669 #endif
670 for (i = 0; i < ARRAY_SIZE(s->slots); i++)
671 s->slots[i].slot = i;
673 s->vmfd = -1;
674 s->fd = qemu_open("/dev/kvm", O_RDWR);
675 if (s->fd == -1) {
676 fprintf(stderr, "Could not access KVM kernel module: %m\n");
677 ret = -errno;
678 goto err;
681 ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
682 if (ret < KVM_API_VERSION) {
683 if (ret > 0)
684 ret = -EINVAL;
685 fprintf(stderr, "kvm version too old\n");
686 goto err;
689 if (ret > KVM_API_VERSION) {
690 ret = -EINVAL;
691 fprintf(stderr, "kvm version not supported\n");
692 goto err;
695 s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
696 if (s->vmfd < 0) {
697 #ifdef TARGET_S390X
698 fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
699 "your host kernel command line\n");
700 #endif
701 goto err;
704 /* initially, KVM allocated its own memory and we had to jump through
705 * hooks to make phys_ram_base point to this. Modern versions of KVM
706 * just use a user allocated buffer so we can use regular pages
707 * unmodified. Make sure we have a sufficiently modern version of KVM.
709 if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
710 ret = -EINVAL;
711 fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
712 upgrade_note);
713 goto err;
716 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
717 * destroyed properly. Since we rely on this capability, refuse to work
718 * with any kernel without this capability. */
719 if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
720 ret = -EINVAL;
722 fprintf(stderr,
723 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
724 upgrade_note);
725 goto err;
728 s->coalesced_mmio = 0;
729 #ifdef KVM_CAP_COALESCED_MMIO
730 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
731 s->coalesced_mmio_ring = NULL;
732 #endif
734 s->broken_set_mem_region = 1;
735 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
736 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
737 if (ret > 0) {
738 s->broken_set_mem_region = 0;
740 #endif
742 s->vcpu_events = 0;
743 #ifdef KVM_CAP_VCPU_EVENTS
744 s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
745 #endif
747 s->robust_singlestep = 0;
748 #ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
749 s->robust_singlestep =
750 kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
751 #endif
753 s->debugregs = 0;
754 #ifdef KVM_CAP_DEBUGREGS
755 s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
756 #endif
758 s->xsave = 0;
759 #ifdef KVM_CAP_XSAVE
760 s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
761 #endif
763 s->xcrs = 0;
764 #ifdef KVM_CAP_XCRS
765 s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
766 #endif
768 ret = kvm_arch_init(s, smp_cpus);
769 if (ret < 0)
770 goto err;
772 kvm_state = s;
773 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
775 s->many_ioeventfds = kvm_check_many_ioeventfds();
777 return 0;
779 err:
780 if (s) {
781 if (s->vmfd != -1)
782 close(s->vmfd);
783 if (s->fd != -1)
784 close(s->fd);
786 qemu_free(s);
788 return ret;
790 #endif
792 static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
793 uint32_t count)
795 int i;
796 uint8_t *ptr = data;
798 for (i = 0; i < count; i++) {
799 if (direction == KVM_EXIT_IO_IN) {
800 switch (size) {
801 case 1:
802 stb_p(ptr, cpu_inb(port));
803 break;
804 case 2:
805 stw_p(ptr, cpu_inw(port));
806 break;
807 case 4:
808 stl_p(ptr, cpu_inl(port));
809 break;
811 } else {
812 switch (size) {
813 case 1:
814 cpu_outb(port, ldub_p(ptr));
815 break;
816 case 2:
817 cpu_outw(port, lduw_p(ptr));
818 break;
819 case 4:
820 cpu_outl(port, ldl_p(ptr));
821 break;
825 ptr += size;
828 return 1;
831 #ifdef KVM_CAP_INTERNAL_ERROR_DATA
832 static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
835 if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
836 int i;
838 fprintf(stderr, "KVM internal error. Suberror: %d\n",
839 run->internal.suberror);
841 for (i = 0; i < run->internal.ndata; ++i) {
842 fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
843 i, (uint64_t)run->internal.data[i]);
846 cpu_dump_state(env, stderr, fprintf, 0);
847 if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
848 fprintf(stderr, "emulation failure\n");
849 if (!kvm_arch_stop_on_emulation_error(env))
850 return;
852 /* FIXME: Should trigger a qmp message to let management know
853 * something went wrong.
855 vm_stop(0);
857 #endif
859 void kvm_flush_coalesced_mmio_buffer(void)
861 #ifdef KVM_CAP_COALESCED_MMIO
862 KVMState *s = kvm_state;
863 if (s->coalesced_mmio_ring) {
864 struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
865 while (ring->first != ring->last) {
866 struct kvm_coalesced_mmio *ent;
868 ent = &ring->coalesced_mmio[ring->first];
870 cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
871 smp_wmb();
872 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
875 #endif
878 #ifdef OBSOLETE_KVM_IMPL
880 static void do_kvm_cpu_synchronize_state(void *_env)
882 CPUState *env = _env;
884 if (!env->kvm_vcpu_dirty) {
885 kvm_arch_get_registers(env);
886 env->kvm_vcpu_dirty = 1;
890 void kvm_cpu_synchronize_state(CPUState *env)
892 if (!env->kvm_vcpu_dirty)
893 run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
896 void kvm_cpu_synchronize_post_reset(CPUState *env)
898 kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
899 env->kvm_vcpu_dirty = 0;
902 void kvm_cpu_synchronize_post_init(CPUState *env)
904 kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
905 env->kvm_vcpu_dirty = 0;
908 int kvm_cpu_exec(CPUState *env)
910 struct kvm_run *run = env->kvm_run;
911 int ret;
913 DPRINTF("kvm_cpu_exec()\n");
915 do {
916 #ifndef CONFIG_IOTHREAD
917 if (env->exit_request) {
918 DPRINTF("interrupt exit requested\n");
919 ret = 0;
920 break;
922 #endif
924 if (kvm_arch_process_irqchip_events(env)) {
925 ret = 0;
926 break;
929 if (env->kvm_vcpu_dirty) {
930 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
931 env->kvm_vcpu_dirty = 0;
934 kvm_arch_pre_run(env, run);
935 cpu_single_env = NULL;
936 qemu_mutex_unlock_iothread();
937 ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
938 qemu_mutex_lock_iothread();
939 cpu_single_env = env;
940 kvm_arch_post_run(env, run);
942 if (ret == -EINTR || ret == -EAGAIN) {
943 cpu_exit(env);
944 DPRINTF("io window exit\n");
945 ret = 0;
946 break;
949 if (ret < 0) {
950 DPRINTF("kvm run failed %s\n", strerror(-ret));
951 abort();
954 kvm_flush_coalesced_mmio_buffer();
956 ret = 0; /* exit loop */
957 switch (run->exit_reason) {
958 case KVM_EXIT_IO:
959 DPRINTF("handle_io\n");
960 ret = kvm_handle_io(run->io.port,
961 (uint8_t *)run + run->io.data_offset,
962 run->io.direction,
963 run->io.size,
964 run->io.count);
965 break;
966 case KVM_EXIT_MMIO:
967 DPRINTF("handle_mmio\n");
968 cpu_physical_memory_rw(run->mmio.phys_addr,
969 run->mmio.data,
970 run->mmio.len,
971 run->mmio.is_write);
972 ret = 1;
973 break;
974 case KVM_EXIT_IRQ_WINDOW_OPEN:
975 DPRINTF("irq_window_open\n");
976 break;
977 case KVM_EXIT_SHUTDOWN:
978 DPRINTF("shutdown\n");
979 qemu_system_reset_request();
980 ret = 1;
981 break;
982 case KVM_EXIT_UNKNOWN:
983 DPRINTF("kvm_exit_unknown\n");
984 break;
985 case KVM_EXIT_FAIL_ENTRY:
986 DPRINTF("kvm_exit_fail_entry\n");
987 break;
988 case KVM_EXIT_EXCEPTION:
989 DPRINTF("kvm_exit_exception\n");
990 break;
991 #ifdef KVM_CAP_INTERNAL_ERROR_DATA
992 case KVM_EXIT_INTERNAL_ERROR:
993 kvm_handle_internal_error(env, run);
994 break;
995 #endif
996 case KVM_EXIT_DEBUG:
997 DPRINTF("kvm_exit_debug\n");
998 #ifdef KVM_CAP_SET_GUEST_DEBUG
999 if (kvm_arch_debug(&run->debug.arch)) {
1000 env->exception_index = EXCP_DEBUG;
1001 return 0;
1003 /* re-enter, this exception was guest-internal */
1004 ret = 1;
1005 #endif /* KVM_CAP_SET_GUEST_DEBUG */
1006 break;
1007 default:
1008 DPRINTF("kvm_arch_handle_exit\n");
1009 ret = kvm_arch_handle_exit(env, run);
1010 break;
1012 } while (ret > 0);
1014 if (env->exit_request) {
1015 env->exit_request = 0;
1016 env->exception_index = EXCP_INTERRUPT;
1019 return ret;
1022 #endif
1023 int kvm_ioctl(KVMState *s, int type, ...)
1025 int ret;
1026 void *arg;
1027 va_list ap;
1029 va_start(ap, type);
1030 arg = va_arg(ap, void *);
1031 va_end(ap);
1033 ret = ioctl(s->fd, type, arg);
1034 if (ret == -1)
1035 ret = -errno;
1037 return ret;
1040 int kvm_vm_ioctl(KVMState *s, int type, ...)
1042 int ret;
1043 void *arg;
1044 va_list ap;
1046 va_start(ap, type);
1047 arg = va_arg(ap, void *);
1048 va_end(ap);
1050 ret = ioctl(s->vmfd, type, arg);
1051 if (ret == -1)
1052 ret = -errno;
1054 return ret;
1057 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
1059 int ret;
1060 void *arg;
1061 va_list ap;
1063 va_start(ap, type);
1064 arg = va_arg(ap, void *);
1065 va_end(ap);
1067 ret = ioctl(env->kvm_fd, type, arg);
1068 if (ret == -1)
1069 ret = -errno;
1071 return ret;
1074 int kvm_has_sync_mmu(void)
1076 #ifdef KVM_CAP_SYNC_MMU
1077 KVMState *s = kvm_state;
1079 return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
1080 #else
1081 return 0;
1082 #endif
1085 int kvm_has_vcpu_events(void)
1087 return kvm_state->vcpu_events;
1090 int kvm_has_robust_singlestep(void)
1092 return kvm_state->robust_singlestep;
1095 int kvm_has_debugregs(void)
1097 return kvm_state->debugregs;
1100 int kvm_has_xsave(void)
1102 return kvm_state->xsave;
1105 int kvm_has_xcrs(void)
1107 return kvm_state->xcrs;
1110 int kvm_has_many_ioeventfds(void)
1112 if (!kvm_enabled()) {
1113 return 0;
1115 return kvm_state->many_ioeventfds;
1118 void kvm_setup_guest_memory(void *start, size_t size)
1120 if (!kvm_has_sync_mmu()) {
1121 int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1123 if (ret) {
1124 perror("qemu_madvise");
1125 fprintf(stderr,
1126 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1127 exit(1);
1132 #ifdef KVM_CAP_SET_GUEST_DEBUG
1133 #ifndef OBSOLETE_KVM_IMPL
1134 #define run_on_cpu on_vcpu
1135 #endif /* !OBSOLETE_KVM_IMPL */
1137 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
1138 target_ulong pc)
1140 struct kvm_sw_breakpoint *bp;
1142 QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
1143 if (bp->pc == pc)
1144 return bp;
1146 return NULL;
1149 int kvm_sw_breakpoints_active(CPUState *env)
1151 return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1154 struct kvm_set_guest_debug_data {
1155 struct kvm_guest_debug dbg;
1156 CPUState *env;
1157 int err;
1160 static void kvm_invoke_set_guest_debug(void *data)
1162 struct kvm_set_guest_debug_data *dbg_data = data;
1163 CPUState *env = dbg_data->env;
1165 dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
1168 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1170 struct kvm_set_guest_debug_data data;
1172 data.dbg.control = reinject_trap;
1174 if (env->singlestep_enabled) {
1175 data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1177 kvm_arch_update_guest_debug(env, &data.dbg);
1178 data.env = env;
1180 run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
1181 return data.err;
1184 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1185 target_ulong len, int type)
1187 struct kvm_sw_breakpoint *bp;
1188 CPUState *env;
1189 int err;
1191 if (type == GDB_BREAKPOINT_SW) {
1192 bp = kvm_find_sw_breakpoint(current_env, addr);
1193 if (bp) {
1194 bp->use_count++;
1195 return 0;
1198 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1199 if (!bp)
1200 return -ENOMEM;
1202 bp->pc = addr;
1203 bp->use_count = 1;
1204 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1205 if (err) {
1206 free(bp);
1207 return err;
1210 QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1211 bp, entry);
1212 } else {
1213 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1214 if (err)
1215 return err;
1218 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1219 err = kvm_update_guest_debug(env, 0);
1220 if (err)
1221 return err;
1223 return 0;
1226 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1227 target_ulong len, int type)
1229 struct kvm_sw_breakpoint *bp;
1230 CPUState *env;
1231 int err;
1233 if (type == GDB_BREAKPOINT_SW) {
1234 bp = kvm_find_sw_breakpoint(current_env, addr);
1235 if (!bp)
1236 return -ENOENT;
1238 if (bp->use_count > 1) {
1239 bp->use_count--;
1240 return 0;
1243 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1244 if (err)
1245 return err;
1247 QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1248 qemu_free(bp);
1249 } else {
1250 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1251 if (err)
1252 return err;
1255 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1256 err = kvm_update_guest_debug(env, 0);
1257 if (err)
1258 return err;
1260 return 0;
1263 void kvm_remove_all_breakpoints(CPUState *current_env)
1265 struct kvm_sw_breakpoint *bp, *next;
1266 KVMState *s = current_env->kvm_state;
1267 CPUState *env;
1269 QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1270 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1271 /* Try harder to find a CPU that currently sees the breakpoint. */
1272 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1273 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1274 break;
1278 kvm_arch_remove_all_hw_breakpoints();
1280 for (env = first_cpu; env != NULL; env = env->next_cpu)
1281 kvm_update_guest_debug(env, 0);
1284 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1286 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1288 return -EINVAL;
1291 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1292 target_ulong len, int type)
1294 return -EINVAL;
1297 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1298 target_ulong len, int type)
1300 return -EINVAL;
1303 void kvm_remove_all_breakpoints(CPUState *current_env)
1306 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1308 int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1310 struct kvm_signal_mask *sigmask;
1311 int r;
1313 if (!sigset)
1314 return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1316 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1318 sigmask->len = 8;
1319 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1320 r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1321 free(sigmask);
1323 return r;
1326 int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
1328 #ifdef KVM_IOEVENTFD
1329 int ret;
1330 struct kvm_ioeventfd iofd;
1332 iofd.datamatch = val;
1333 iofd.addr = addr;
1334 iofd.len = 4;
1335 iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
1336 iofd.fd = fd;
1338 if (!kvm_enabled()) {
1339 return -ENOSYS;
1342 if (!assign) {
1343 iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1346 ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
1348 if (ret < 0) {
1349 return -errno;
1352 return 0;
1353 #else
1354 return -ENOSYS;
1355 #endif
1358 int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
1360 #ifdef KVM_IOEVENTFD
1361 struct kvm_ioeventfd kick = {
1362 .datamatch = val,
1363 .addr = addr,
1364 .len = 2,
1365 .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
1366 .fd = fd,
1368 int r;
1369 if (!kvm_enabled())
1370 return -ENOSYS;
1371 if (!assign)
1372 kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1373 r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
1374 if (r < 0)
1375 return r;
1376 return 0;
1377 #else
1378 return -ENOSYS;
1379 #endif
1382 #if defined(KVM_IRQFD)
1383 int kvm_set_irqfd(int gsi, int fd, bool assigned)
1385 struct kvm_irqfd irqfd = {
1386 .fd = fd,
1387 .gsi = gsi,
1388 .flags = assigned ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
1390 int r;
1391 if (!kvm_enabled() || !kvm_irqchip_in_kernel())
1392 return -ENOSYS;
1394 r = kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd);
1395 if (r < 0)
1396 return r;
1397 return 0;
1399 #endif
1401 #undef PAGE_SIZE
1402 #include "qemu-kvm.c"