qemu-kvm: main_loop_wait now takes blocking/nonblocking argument
[qemu-kvm/amd-iommu.git] / kvm-all.c
blobc0d27db6e00de1e068c34de03c9200fe86944dbe
1 /*
2 * QEMU KVM support
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include <sys/types.h>
17 #include <sys/ioctl.h>
18 #include <sys/mman.h>
19 #include <stdarg.h>
21 #include <linux/kvm.h>
23 #include "qemu-common.h"
24 #include "qemu-barrier.h"
25 #include "sysemu.h"
26 #include "hw/hw.h"
27 #include "gdbstub.h"
28 #include "kvm.h"
30 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
31 #define PAGE_SIZE TARGET_PAGE_SIZE
32 #ifdef KVM_UPSTREAM
34 //#define DEBUG_KVM
36 #ifdef DEBUG_KVM
37 #define dprintf(fmt, ...) \
38 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
39 #else
40 #define dprintf(fmt, ...) \
41 do { } while (0)
42 #endif
44 typedef struct KVMSlot
46 target_phys_addr_t start_addr;
47 ram_addr_t memory_size;
48 ram_addr_t phys_offset;
49 int slot;
50 int flags;
51 } KVMSlot;
53 typedef struct kvm_dirty_log KVMDirtyLog;
55 struct KVMState
57 KVMSlot slots[32];
58 int fd;
59 int vmfd;
60 int coalesced_mmio;
61 #ifdef KVM_CAP_COALESCED_MMIO
62 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
63 #endif
64 int broken_set_mem_region;
65 int migration_log;
66 int vcpu_events;
67 int robust_singlestep;
68 int debugregs;
69 #ifdef KVM_CAP_SET_GUEST_DEBUG
70 struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
71 #endif
72 int irqchip_in_kernel;
73 int pit_in_kernel;
76 static KVMState *kvm_state;
78 static KVMSlot *kvm_alloc_slot(KVMState *s)
80 int i;
82 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
83 /* KVM private memory slots */
84 if (i >= 8 && i < 12)
85 continue;
86 if (s->slots[i].memory_size == 0)
87 return &s->slots[i];
90 fprintf(stderr, "%s: no free slot available\n", __func__);
91 abort();
94 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
95 target_phys_addr_t start_addr,
96 target_phys_addr_t end_addr)
98 int i;
100 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
101 KVMSlot *mem = &s->slots[i];
103 if (start_addr == mem->start_addr &&
104 end_addr == mem->start_addr + mem->memory_size) {
105 return mem;
109 return NULL;
113 * Find overlapping slot with lowest start address
115 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
116 target_phys_addr_t start_addr,
117 target_phys_addr_t end_addr)
119 KVMSlot *found = NULL;
120 int i;
122 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
123 KVMSlot *mem = &s->slots[i];
125 if (mem->memory_size == 0 ||
126 (found && found->start_addr < mem->start_addr)) {
127 continue;
130 if (end_addr > mem->start_addr &&
131 start_addr < mem->start_addr + mem->memory_size) {
132 found = mem;
136 return found;
139 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
141 struct kvm_userspace_memory_region mem;
143 mem.slot = slot->slot;
144 mem.guest_phys_addr = slot->start_addr;
145 mem.memory_size = slot->memory_size;
146 mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
147 mem.flags = slot->flags;
148 if (s->migration_log) {
149 mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
151 return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
154 static void kvm_reset_vcpu(void *opaque)
156 CPUState *env = opaque;
158 kvm_arch_reset_vcpu(env);
160 #endif
162 int kvm_irqchip_in_kernel(void)
164 return kvm_state->irqchip_in_kernel;
167 int kvm_pit_in_kernel(void)
169 return kvm_state->pit_in_kernel;
173 #ifdef KVM_UPSTREAM
174 int kvm_init_vcpu(CPUState *env)
176 KVMState *s = kvm_state;
177 long mmap_size;
178 int ret;
180 dprintf("kvm_init_vcpu\n");
182 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
183 if (ret < 0) {
184 dprintf("kvm_create_vcpu failed\n");
185 goto err;
188 env->kvm_fd = ret;
189 env->kvm_state = s;
191 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
192 if (mmap_size < 0) {
193 dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
194 goto err;
197 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
198 env->kvm_fd, 0);
199 if (env->kvm_run == MAP_FAILED) {
200 ret = -errno;
201 dprintf("mmap'ing vcpu state failed\n");
202 goto err;
205 #ifdef KVM_CAP_COALESCED_MMIO
206 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
207 s->coalesced_mmio_ring = (void *) env->kvm_run +
208 s->coalesced_mmio * PAGE_SIZE;
209 #endif
211 ret = kvm_arch_init_vcpu(env);
212 if (ret == 0) {
213 qemu_register_reset(kvm_reset_vcpu, env);
214 kvm_arch_reset_vcpu(env);
216 err:
217 return ret;
221 * dirty pages logging control
223 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
224 ram_addr_t size, int flags, int mask)
226 KVMState *s = kvm_state;
227 KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
228 int old_flags;
230 if (mem == NULL) {
231 fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
232 TARGET_FMT_plx "\n", __func__, phys_addr,
233 (target_phys_addr_t)(phys_addr + size - 1));
234 return -EINVAL;
237 old_flags = mem->flags;
239 flags = (mem->flags & ~mask) | flags;
240 mem->flags = flags;
242 /* If nothing changed effectively, no need to issue ioctl */
243 if (s->migration_log) {
244 flags |= KVM_MEM_LOG_DIRTY_PAGES;
246 if (flags == old_flags) {
247 return 0;
250 return kvm_set_user_memory_region(s, mem);
253 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
255 return kvm_dirty_pages_log_change(phys_addr, size,
256 KVM_MEM_LOG_DIRTY_PAGES,
257 KVM_MEM_LOG_DIRTY_PAGES);
260 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
262 return kvm_dirty_pages_log_change(phys_addr, size,
264 KVM_MEM_LOG_DIRTY_PAGES);
267 static int kvm_set_migration_log(int enable)
269 KVMState *s = kvm_state;
270 KVMSlot *mem;
271 int i, err;
273 s->migration_log = enable;
275 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
276 mem = &s->slots[i];
278 if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
279 continue;
281 err = kvm_set_user_memory_region(s, mem);
282 if (err) {
283 return err;
286 return 0;
289 static int test_le_bit(unsigned long nr, unsigned char *addr)
291 return (addr[nr >> 3] >> (nr & 7)) & 1;
295 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
296 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
297 * This means all bits are set to dirty.
299 * @start_add: start of logged region.
300 * @end_addr: end of logged region.
302 static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
303 target_phys_addr_t end_addr)
305 KVMState *s = kvm_state;
306 unsigned long size, allocated_size = 0;
307 target_phys_addr_t phys_addr;
308 ram_addr_t addr;
309 KVMDirtyLog d;
310 KVMSlot *mem;
311 int ret = 0;
313 d.dirty_bitmap = NULL;
314 while (start_addr < end_addr) {
315 mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
316 if (mem == NULL) {
317 break;
320 size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
321 if (!d.dirty_bitmap) {
322 d.dirty_bitmap = qemu_malloc(size);
323 } else if (size > allocated_size) {
324 d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
326 allocated_size = size;
327 memset(d.dirty_bitmap, 0, allocated_size);
329 d.slot = mem->slot;
331 if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
332 dprintf("ioctl failed %d\n", errno);
333 ret = -1;
334 break;
337 for (phys_addr = mem->start_addr, addr = mem->phys_offset;
338 phys_addr < mem->start_addr + mem->memory_size;
339 phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
340 unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
341 unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
343 if (test_le_bit(nr, bitmap)) {
344 cpu_physical_memory_set_dirty(addr);
347 start_addr = phys_addr;
349 qemu_free(d.dirty_bitmap);
351 return ret;
353 #endif
355 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
357 int ret = -ENOSYS;
358 #ifdef KVM_CAP_COALESCED_MMIO
359 KVMState *s = kvm_state;
361 if (s->coalesced_mmio) {
362 struct kvm_coalesced_mmio_zone zone;
364 zone.addr = start;
365 zone.size = size;
367 ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
369 #endif
371 return ret;
374 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
376 int ret = -ENOSYS;
377 #ifdef KVM_CAP_COALESCED_MMIO
378 KVMState *s = kvm_state;
380 if (s->coalesced_mmio) {
381 struct kvm_coalesced_mmio_zone zone;
383 zone.addr = start;
384 zone.size = size;
386 ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
388 #endif
390 return ret;
393 int kvm_check_extension(KVMState *s, unsigned int extension)
395 int ret;
397 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
398 if (ret < 0) {
399 ret = 0;
402 return ret;
404 #ifdef KVM_UPSTREAM
406 static void kvm_set_phys_mem(target_phys_addr_t start_addr,
407 ram_addr_t size,
408 ram_addr_t phys_offset)
410 KVMState *s = kvm_state;
411 ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
412 KVMSlot *mem, old;
413 int err;
415 if (start_addr & ~TARGET_PAGE_MASK) {
416 if (flags >= IO_MEM_UNASSIGNED) {
417 if (!kvm_lookup_overlapping_slot(s, start_addr,
418 start_addr + size)) {
419 return;
421 fprintf(stderr, "Unaligned split of a KVM memory slot\n");
422 } else {
423 fprintf(stderr, "Only page-aligned memory slots supported\n");
425 abort();
428 /* KVM does not support read-only slots */
429 phys_offset &= ~IO_MEM_ROM;
431 while (1) {
432 mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
433 if (!mem) {
434 break;
437 if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
438 (start_addr + size <= mem->start_addr + mem->memory_size) &&
439 (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
440 /* The new slot fits into the existing one and comes with
441 * identical parameters - nothing to be done. */
442 return;
445 old = *mem;
447 /* unregister the overlapping slot */
448 mem->memory_size = 0;
449 err = kvm_set_user_memory_region(s, mem);
450 if (err) {
451 fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
452 __func__, strerror(-err));
453 abort();
456 /* Workaround for older KVM versions: we can't join slots, even not by
457 * unregistering the previous ones and then registering the larger
458 * slot. We have to maintain the existing fragmentation. Sigh.
460 * This workaround assumes that the new slot starts at the same
461 * address as the first existing one. If not or if some overlapping
462 * slot comes around later, we will fail (not seen in practice so far)
463 * - and actually require a recent KVM version. */
464 if (s->broken_set_mem_region &&
465 old.start_addr == start_addr && old.memory_size < size &&
466 flags < IO_MEM_UNASSIGNED) {
467 mem = kvm_alloc_slot(s);
468 mem->memory_size = old.memory_size;
469 mem->start_addr = old.start_addr;
470 mem->phys_offset = old.phys_offset;
471 mem->flags = 0;
473 err = kvm_set_user_memory_region(s, mem);
474 if (err) {
475 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
476 strerror(-err));
477 abort();
480 start_addr += old.memory_size;
481 phys_offset += old.memory_size;
482 size -= old.memory_size;
483 continue;
486 /* register prefix slot */
487 if (old.start_addr < start_addr) {
488 mem = kvm_alloc_slot(s);
489 mem->memory_size = start_addr - old.start_addr;
490 mem->start_addr = old.start_addr;
491 mem->phys_offset = old.phys_offset;
492 mem->flags = 0;
494 err = kvm_set_user_memory_region(s, mem);
495 if (err) {
496 fprintf(stderr, "%s: error registering prefix slot: %s\n",
497 __func__, strerror(-err));
498 abort();
502 /* register suffix slot */
503 if (old.start_addr + old.memory_size > start_addr + size) {
504 ram_addr_t size_delta;
506 mem = kvm_alloc_slot(s);
507 mem->start_addr = start_addr + size;
508 size_delta = mem->start_addr - old.start_addr;
509 mem->memory_size = old.memory_size - size_delta;
510 mem->phys_offset = old.phys_offset + size_delta;
511 mem->flags = 0;
513 err = kvm_set_user_memory_region(s, mem);
514 if (err) {
515 fprintf(stderr, "%s: error registering suffix slot: %s\n",
516 __func__, strerror(-err));
517 abort();
522 /* in case the KVM bug workaround already "consumed" the new slot */
523 if (!size)
524 return;
526 /* KVM does not need to know about this memory */
527 if (flags >= IO_MEM_UNASSIGNED)
528 return;
530 mem = kvm_alloc_slot(s);
531 mem->memory_size = size;
532 mem->start_addr = start_addr;
533 mem->phys_offset = phys_offset;
534 mem->flags = 0;
536 err = kvm_set_user_memory_region(s, mem);
537 if (err) {
538 fprintf(stderr, "%s: error registering slot: %s\n", __func__,
539 strerror(-err));
540 abort();
544 #endif
546 static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
547 target_phys_addr_t start_addr,
548 ram_addr_t size,
549 ram_addr_t phys_offset)
551 kvm_set_phys_mem(start_addr, size, phys_offset);
554 static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
555 target_phys_addr_t start_addr,
556 target_phys_addr_t end_addr)
558 return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
561 static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
562 int enable)
564 return kvm_set_migration_log(enable);
567 static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
568 .set_memory = kvm_client_set_memory,
569 .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
570 .migration_log = kvm_client_migration_log,
574 void kvm_cpu_register_phys_memory_client(void)
576 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
579 #ifdef KVM_UPSTREAM
581 int kvm_init(int smp_cpus)
583 static const char upgrade_note[] =
584 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
585 "(see http://sourceforge.net/projects/kvm).\n";
586 KVMState *s;
587 int ret;
588 int i;
590 if (smp_cpus > 1) {
591 fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
592 return -EINVAL;
595 s = qemu_mallocz(sizeof(KVMState));
597 #ifdef KVM_CAP_SET_GUEST_DEBUG
598 QTAILQ_INIT(&s->kvm_sw_breakpoints);
599 #endif
600 for (i = 0; i < ARRAY_SIZE(s->slots); i++)
601 s->slots[i].slot = i;
603 s->vmfd = -1;
604 s->fd = qemu_open("/dev/kvm", O_RDWR);
605 if (s->fd == -1) {
606 fprintf(stderr, "Could not access KVM kernel module: %m\n");
607 ret = -errno;
608 goto err;
611 ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
612 if (ret < KVM_API_VERSION) {
613 if (ret > 0)
614 ret = -EINVAL;
615 fprintf(stderr, "kvm version too old\n");
616 goto err;
619 if (ret > KVM_API_VERSION) {
620 ret = -EINVAL;
621 fprintf(stderr, "kvm version not supported\n");
622 goto err;
625 s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
626 if (s->vmfd < 0) {
627 #ifdef TARGET_S390X
628 fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
629 "your host kernel command line\n");
630 #endif
631 goto err;
634 /* initially, KVM allocated its own memory and we had to jump through
635 * hooks to make phys_ram_base point to this. Modern versions of KVM
636 * just use a user allocated buffer so we can use regular pages
637 * unmodified. Make sure we have a sufficiently modern version of KVM.
639 if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
640 ret = -EINVAL;
641 fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
642 upgrade_note);
643 goto err;
646 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
647 * destroyed properly. Since we rely on this capability, refuse to work
648 * with any kernel without this capability. */
649 if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
650 ret = -EINVAL;
652 fprintf(stderr,
653 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
654 upgrade_note);
655 goto err;
658 s->coalesced_mmio = 0;
659 #ifdef KVM_CAP_COALESCED_MMIO
660 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
661 s->coalesced_mmio_ring = NULL;
662 #endif
664 s->broken_set_mem_region = 1;
665 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
666 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
667 if (ret > 0) {
668 s->broken_set_mem_region = 0;
670 #endif
672 s->vcpu_events = 0;
673 #ifdef KVM_CAP_VCPU_EVENTS
674 s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
675 #endif
677 s->robust_singlestep = 0;
678 #ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
679 s->robust_singlestep =
680 kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
681 #endif
683 s->debugregs = 0;
684 #ifdef KVM_CAP_DEBUGREGS
685 s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
686 #endif
688 ret = kvm_arch_init(s, smp_cpus);
689 if (ret < 0)
690 goto err;
692 kvm_state = s;
693 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
695 return 0;
697 err:
698 if (s) {
699 if (s->vmfd != -1)
700 close(s->vmfd);
701 if (s->fd != -1)
702 close(s->fd);
704 qemu_free(s);
706 return ret;
708 #endif
710 static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
711 uint32_t count)
713 int i;
714 uint8_t *ptr = data;
716 for (i = 0; i < count; i++) {
717 if (direction == KVM_EXIT_IO_IN) {
718 switch (size) {
719 case 1:
720 stb_p(ptr, cpu_inb(port));
721 break;
722 case 2:
723 stw_p(ptr, cpu_inw(port));
724 break;
725 case 4:
726 stl_p(ptr, cpu_inl(port));
727 break;
729 } else {
730 switch (size) {
731 case 1:
732 cpu_outb(port, ldub_p(ptr));
733 break;
734 case 2:
735 cpu_outw(port, lduw_p(ptr));
736 break;
737 case 4:
738 cpu_outl(port, ldl_p(ptr));
739 break;
743 ptr += size;
746 return 1;
749 void kvm_flush_coalesced_mmio_buffer(void)
751 #ifdef KVM_CAP_COALESCED_MMIO
752 KVMState *s = kvm_state;
753 if (s->coalesced_mmio_ring) {
754 struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
755 while (ring->first != ring->last) {
756 struct kvm_coalesced_mmio *ent;
758 ent = &ring->coalesced_mmio[ring->first];
760 cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
761 smp_wmb();
762 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
765 #endif
768 #ifdef KVM_UPSTREAM
770 void kvm_cpu_synchronize_state(CPUState *env)
772 if (!env->kvm_vcpu_dirty) {
773 kvm_arch_get_registers(env);
774 env->kvm_vcpu_dirty = 1;
778 void kvm_cpu_synchronize_post_reset(CPUState *env)
780 kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
781 env->kvm_vcpu_dirty = 0;
784 void kvm_cpu_synchronize_post_init(CPUState *env)
786 kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
787 env->kvm_vcpu_dirty = 0;
790 int kvm_cpu_exec(CPUState *env)
792 struct kvm_run *run = env->kvm_run;
793 int ret;
795 dprintf("kvm_cpu_exec()\n");
797 do {
798 #ifndef CONFIG_IOTHREAD
799 if (env->exit_request) {
800 dprintf("interrupt exit requested\n");
801 ret = 0;
802 break;
804 #endif
806 if (env->kvm_vcpu_dirty) {
807 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
808 env->kvm_vcpu_dirty = 0;
811 kvm_arch_pre_run(env, run);
812 qemu_mutex_unlock_iothread();
813 ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
814 qemu_mutex_lock_iothread();
815 kvm_arch_post_run(env, run);
817 if (ret == -EINTR || ret == -EAGAIN) {
818 cpu_exit(env);
819 dprintf("io window exit\n");
820 ret = 0;
821 break;
824 if (ret < 0) {
825 dprintf("kvm run failed %s\n", strerror(-ret));
826 abort();
829 kvm_flush_coalesced_mmio_buffer();
831 ret = 0; /* exit loop */
832 switch (run->exit_reason) {
833 case KVM_EXIT_IO:
834 dprintf("handle_io\n");
835 ret = kvm_handle_io(run->io.port,
836 (uint8_t *)run + run->io.data_offset,
837 run->io.direction,
838 run->io.size,
839 run->io.count);
840 break;
841 case KVM_EXIT_MMIO:
842 dprintf("handle_mmio\n");
843 cpu_physical_memory_rw(run->mmio.phys_addr,
844 run->mmio.data,
845 run->mmio.len,
846 run->mmio.is_write);
847 ret = 1;
848 break;
849 case KVM_EXIT_IRQ_WINDOW_OPEN:
850 dprintf("irq_window_open\n");
851 break;
852 case KVM_EXIT_SHUTDOWN:
853 dprintf("shutdown\n");
854 qemu_system_reset_request();
855 ret = 1;
856 break;
857 case KVM_EXIT_UNKNOWN:
858 dprintf("kvm_exit_unknown\n");
859 break;
860 case KVM_EXIT_FAIL_ENTRY:
861 dprintf("kvm_exit_fail_entry\n");
862 break;
863 case KVM_EXIT_EXCEPTION:
864 dprintf("kvm_exit_exception\n");
865 break;
866 case KVM_EXIT_DEBUG:
867 dprintf("kvm_exit_debug\n");
868 #ifdef KVM_CAP_SET_GUEST_DEBUG
869 if (kvm_arch_debug(&run->debug.arch)) {
870 gdb_set_stop_cpu(env);
871 vm_stop(EXCP_DEBUG);
872 env->exception_index = EXCP_DEBUG;
873 return 0;
875 /* re-enter, this exception was guest-internal */
876 ret = 1;
877 #endif /* KVM_CAP_SET_GUEST_DEBUG */
878 break;
879 default:
880 dprintf("kvm_arch_handle_exit\n");
881 ret = kvm_arch_handle_exit(env, run);
882 break;
884 } while (ret > 0);
886 if (env->exit_request) {
887 env->exit_request = 0;
888 env->exception_index = EXCP_INTERRUPT;
891 return ret;
894 #endif
895 int kvm_ioctl(KVMState *s, int type, ...)
897 int ret;
898 void *arg;
899 va_list ap;
901 va_start(ap, type);
902 arg = va_arg(ap, void *);
903 va_end(ap);
905 ret = ioctl(s->fd, type, arg);
906 if (ret == -1)
907 ret = -errno;
909 return ret;
912 int kvm_vm_ioctl(KVMState *s, int type, ...)
914 int ret;
915 void *arg;
916 va_list ap;
918 va_start(ap, type);
919 arg = va_arg(ap, void *);
920 va_end(ap);
922 ret = ioctl(s->vmfd, type, arg);
923 if (ret == -1)
924 ret = -errno;
926 return ret;
929 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
931 int ret;
932 void *arg;
933 va_list ap;
935 va_start(ap, type);
936 arg = va_arg(ap, void *);
937 va_end(ap);
939 ret = ioctl(env->kvm_fd, type, arg);
940 if (ret == -1)
941 ret = -errno;
943 return ret;
946 int kvm_has_sync_mmu(void)
948 #ifdef KVM_CAP_SYNC_MMU
949 KVMState *s = kvm_state;
951 return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
952 #else
953 return 0;
954 #endif
957 int kvm_has_vcpu_events(void)
959 return kvm_state->vcpu_events;
962 int kvm_has_robust_singlestep(void)
964 return kvm_state->robust_singlestep;
967 int kvm_has_debugregs(void)
969 return kvm_state->debugregs;
972 void kvm_setup_guest_memory(void *start, size_t size)
974 if (!kvm_has_sync_mmu()) {
975 #ifdef MADV_DONTFORK
976 int ret = madvise(start, size, MADV_DONTFORK);
978 if (ret) {
979 perror("madvice");
980 exit(1);
982 #else
983 fprintf(stderr,
984 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
985 exit(1);
986 #endif
990 #ifdef KVM_CAP_SET_GUEST_DEBUG
992 #ifdef KVM_UPSTREAM
993 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
995 #ifdef CONFIG_IOTHREAD
996 if (env != cpu_single_env) {
997 abort();
999 #endif
1000 func(data);
1002 #else /* !KVM_UPSTREAM */
1003 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data);
1004 #endif /* !KVM_UPSTREAM */
1006 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
1007 target_ulong pc)
1009 struct kvm_sw_breakpoint *bp;
1011 QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
1012 if (bp->pc == pc)
1013 return bp;
1015 return NULL;
1018 int kvm_sw_breakpoints_active(CPUState *env)
1020 return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1023 struct kvm_set_guest_debug_data {
1024 struct kvm_guest_debug dbg;
1025 CPUState *env;
1026 int err;
1029 static void kvm_invoke_set_guest_debug(void *data)
1031 struct kvm_set_guest_debug_data *dbg_data = data;
1032 CPUState *env = dbg_data->env;
1034 dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
1037 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1039 struct kvm_set_guest_debug_data data;
1041 data.dbg.control = reinject_trap;
1043 if (env->singlestep_enabled) {
1044 data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1046 kvm_arch_update_guest_debug(env, &data.dbg);
1047 data.env = env;
1049 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1050 return data.err;
1053 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1054 target_ulong len, int type)
1056 struct kvm_sw_breakpoint *bp;
1057 CPUState *env;
1058 int err;
1060 if (type == GDB_BREAKPOINT_SW) {
1061 bp = kvm_find_sw_breakpoint(current_env, addr);
1062 if (bp) {
1063 bp->use_count++;
1064 return 0;
1067 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1068 if (!bp)
1069 return -ENOMEM;
1071 bp->pc = addr;
1072 bp->use_count = 1;
1073 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1074 if (err) {
1075 free(bp);
1076 return err;
1079 QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1080 bp, entry);
1081 } else {
1082 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1083 if (err)
1084 return err;
1087 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1088 err = kvm_update_guest_debug(env, 0);
1089 if (err)
1090 return err;
1092 return 0;
1095 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1096 target_ulong len, int type)
1098 struct kvm_sw_breakpoint *bp;
1099 CPUState *env;
1100 int err;
1102 if (type == GDB_BREAKPOINT_SW) {
1103 bp = kvm_find_sw_breakpoint(current_env, addr);
1104 if (!bp)
1105 return -ENOENT;
1107 if (bp->use_count > 1) {
1108 bp->use_count--;
1109 return 0;
1112 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1113 if (err)
1114 return err;
1116 QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1117 qemu_free(bp);
1118 } else {
1119 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1120 if (err)
1121 return err;
1124 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1125 err = kvm_update_guest_debug(env, 0);
1126 if (err)
1127 return err;
1129 return 0;
1132 void kvm_remove_all_breakpoints(CPUState *current_env)
1134 struct kvm_sw_breakpoint *bp, *next;
1135 KVMState *s = current_env->kvm_state;
1136 CPUState *env;
1138 QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1139 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1140 /* Try harder to find a CPU that currently sees the breakpoint. */
1141 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1142 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1143 break;
1147 kvm_arch_remove_all_hw_breakpoints();
1149 for (env = first_cpu; env != NULL; env = env->next_cpu)
1150 kvm_update_guest_debug(env, 0);
1153 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1155 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1157 return -EINVAL;
1160 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1161 target_ulong len, int type)
1163 return -EINVAL;
1166 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1167 target_ulong len, int type)
1169 return -EINVAL;
1172 void kvm_remove_all_breakpoints(CPUState *current_env)
1175 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1177 int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1179 struct kvm_signal_mask *sigmask;
1180 int r;
1182 if (!sigset)
1183 return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1185 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1187 sigmask->len = 8;
1188 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1189 r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1190 free(sigmask);
1192 return r;
1195 #ifdef KVM_IOEVENTFD
1196 int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
1198 struct kvm_ioeventfd kick = {
1199 .datamatch = val,
1200 .addr = addr,
1201 .len = 2,
1202 .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
1203 .fd = fd,
1205 int r;
1206 if (!kvm_enabled())
1207 return -ENOSYS;
1208 if (!assign)
1209 kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1210 r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
1211 if (r < 0)
1212 return r;
1213 return 0;
1215 #endif
1217 #if defined(KVM_IRQFD)
1218 int kvm_set_irqfd(int gsi, int fd, bool assigned)
1220 struct kvm_irqfd irqfd = {
1221 .fd = fd,
1222 .gsi = gsi,
1223 .flags = assigned ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
1225 int r;
1226 if (!kvm_enabled() || !kvm_irqchip_in_kernel())
1227 return -ENOSYS;
1229 r = kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd);
1230 if (r < 0)
1231 return r;
1232 return 0;
1234 #endif
1236 #undef PAGE_SIZE
1237 #include "qemu-kvm.c"