One CONFIG_EVENTFD should be enough
[qemu-kvm/amd-iommu.git] / qemu-kvm.c
blob4fdb6e60e1c19c66efd2d99a1aac1ca24d88a618
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm.h"
24 #include <pthread.h>
25 #include <sys/utsname.h>
26 #include <sys/syscall.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #include <signal.h>
31 #define false 0
32 #define true 1
34 #define EXPECTED_KVM_API_VERSION 12
36 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
37 #error libkvm: userspace and kernel version mismatch
38 #endif
40 int kvm_allowed = 1;
41 int kvm_irqchip = 1;
42 int kvm_pit = 1;
43 int kvm_pit_reinject = 1;
44 int kvm_nested = 0;
47 KVMState *kvm_state;
48 kvm_context_t kvm_context;
50 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
51 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
53 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
54 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
55 __thread CPUState *current_env;
57 static int qemu_system_ready;
59 #define SIG_IPI (SIGRTMIN+4)
61 pthread_t io_thread;
62 static int io_thread_fd = -1;
63 static int io_thread_sigfd = -1;
65 static CPUState *kvm_debug_cpu_requested;
67 static uint64_t phys_ram_size;
69 #ifdef USE_KVM_DEVICE_ASSIGNMENT
70 /* The list of ioperm_data */
71 static QLIST_HEAD(, ioperm_data) ioperm_head;
72 #endif
74 //#define DEBUG_MEMREG
75 #ifdef DEBUG_MEMREG
76 #define DPRINTF(fmt, args...) \
77 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
78 #else
79 #define DPRINTF(fmt, args...) do {} while (0)
80 #endif
82 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
84 int kvm_abi = EXPECTED_KVM_API_VERSION;
85 int kvm_page_size;
87 #ifdef KVM_CAP_SET_GUEST_DEBUG
88 static int kvm_debug(void *opaque, void *data,
89 struct kvm_debug_exit_arch *arch_info)
91 int handle = kvm_arch_debug(arch_info);
92 CPUState *env = data;
94 if (handle) {
95 kvm_debug_cpu_requested = env;
96 env->stopped = 1;
98 return handle;
100 #endif
102 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
104 cpu_physical_memory_rw(addr, data, len, 0);
105 return 0;
108 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
110 cpu_physical_memory_rw(addr, data, len, 1);
111 return 0;
114 static int handle_unhandled(uint64_t reason)
116 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
117 return -EINVAL;
121 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
123 uint32_t *bitmap = kvm->used_gsi_bitmap;
125 if (gsi < kvm->max_gsi)
126 bitmap[gsi / 32] |= 1U << (gsi % 32);
127 else
128 DPRINTF("Invalid GSI %d\n");
131 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
133 uint32_t *bitmap = kvm->used_gsi_bitmap;
135 if (gsi < kvm->max_gsi)
136 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
137 else
138 DPRINTF("Invalid GSI %d\n");
141 struct slot_info {
142 unsigned long phys_addr;
143 unsigned long len;
144 unsigned long userspace_addr;
145 unsigned flags;
146 int logging_count;
149 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
151 static void init_slots(void)
153 int i;
155 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
156 slots[i].len = 0;
159 static int get_free_slot(kvm_context_t kvm)
161 int i;
162 int tss_ext;
164 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
165 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
166 #else
167 tss_ext = 0;
168 #endif
171 * on older kernels where the set tss ioctl is not supprted we must save
172 * slot 0 to hold the extended memory, as the vmx will use the last 3
173 * pages of this slot.
175 if (tss_ext > 0)
176 i = 0;
177 else
178 i = 1;
180 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
181 if (!slots[i].len)
182 return i;
183 return -1;
186 static void register_slot(int slot, unsigned long phys_addr,
187 unsigned long len, unsigned long userspace_addr,
188 unsigned flags)
190 slots[slot].phys_addr = phys_addr;
191 slots[slot].len = len;
192 slots[slot].userspace_addr = userspace_addr;
193 slots[slot].flags = flags;
196 static void free_slot(int slot)
198 slots[slot].len = 0;
199 slots[slot].logging_count = 0;
202 static int get_slot(unsigned long phys_addr)
204 int i;
206 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
207 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
208 (slots[i].phys_addr + slots[i].len - 1) >= phys_addr)
209 return i;
211 return -1;
214 /* Returns -1 if this slot is not totally contained on any other,
215 * and the number of the slot otherwise */
216 static int get_container_slot(uint64_t phys_addr, unsigned long size)
218 int i;
220 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
221 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
222 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
223 return i;
224 return -1;
227 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr,
228 unsigned long size)
230 int slot = get_container_slot(phys_addr, size);
231 if (slot == -1)
232 return 0;
233 return 1;
237 * dirty pages logging control
239 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
240 unsigned long phys_addr, unsigned flags,
241 unsigned mask)
243 int r = -1;
244 int slot = get_slot(phys_addr);
246 if (slot == -1) {
247 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
248 return 1;
251 flags = (slots[slot].flags & ~mask) | flags;
252 if (flags == slots[slot].flags)
253 return 0;
254 slots[slot].flags = flags;
257 struct kvm_userspace_memory_region mem = {
258 .slot = slot,
259 .memory_size = slots[slot].len,
260 .guest_phys_addr = slots[slot].phys_addr,
261 .userspace_addr = slots[slot].userspace_addr,
262 .flags = slots[slot].flags,
266 DPRINTF("slot %d start %llx len %llx flags %x\n",
267 mem.slot, mem.guest_phys_addr, mem.memory_size, mem.flags);
268 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
269 if (r < 0)
270 fprintf(stderr, "%s: %m\n", __FUNCTION__);
272 return r;
275 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
276 int (*change)(kvm_context_t kvm,
277 uint64_t start,
278 uint64_t len))
280 int i, r;
282 for (i = r = 0; i < KVM_MAX_NUM_MEM_REGIONS && r == 0; i++) {
283 if (slots[i].len)
284 r = change(kvm, slots[i].phys_addr, slots[i].len);
286 return r;
289 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, uint64_t phys_addr,
290 uint64_t len)
292 int slot = get_slot(phys_addr);
294 DPRINTF("start %" PRIx64 " len %" PRIx64 "\n", phys_addr, len);
295 if (slot == -1) {
296 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
297 return -EINVAL;
300 if (slots[slot].logging_count++)
301 return 0;
303 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
304 KVM_MEM_LOG_DIRTY_PAGES,
305 KVM_MEM_LOG_DIRTY_PAGES);
308 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, uint64_t phys_addr,
309 uint64_t len)
311 int slot = get_slot(phys_addr);
313 if (slot == -1) {
314 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
315 return -EINVAL;
318 if (--slots[slot].logging_count)
319 return 0;
321 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, 0,
322 KVM_MEM_LOG_DIRTY_PAGES);
326 * Enable dirty page logging for all memory regions
328 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
330 if (kvm->dirty_pages_log_all)
331 return 0;
332 kvm->dirty_pages_log_all = 1;
333 return kvm_dirty_pages_log_change_all(kvm, kvm_dirty_pages_log_enable_slot);
337 * Enable dirty page logging only for memory regions that were created with
338 * dirty logging enabled (disable for all other memory regions).
340 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
342 if (!kvm->dirty_pages_log_all)
343 return 0;
344 kvm->dirty_pages_log_all = 0;
345 return kvm_dirty_pages_log_change_all(kvm,
346 kvm_dirty_pages_log_disable_slot);
350 static int kvm_create_context(void);
352 int kvm_init(int smp_cpus)
354 int fd;
355 int r, gsi_count;
358 fd = open("/dev/kvm", O_RDWR);
359 if (fd == -1) {
360 perror("open /dev/kvm");
361 return -1;
363 r = ioctl(fd, KVM_GET_API_VERSION, 0);
364 if (r == -1) {
365 fprintf(stderr,
366 "kvm kernel version too old: "
367 "KVM_GET_API_VERSION ioctl not supported\n");
368 goto out_close;
370 if (r < EXPECTED_KVM_API_VERSION) {
371 fprintf(stderr, "kvm kernel version too old: "
372 "We expect API version %d or newer, but got "
373 "version %d\n", EXPECTED_KVM_API_VERSION, r);
374 goto out_close;
376 if (r > EXPECTED_KVM_API_VERSION) {
377 fprintf(stderr, "kvm userspace version too old\n");
378 goto out_close;
380 kvm_abi = r;
381 kvm_page_size = getpagesize();
382 kvm_state = qemu_mallocz(sizeof(*kvm_state));
383 kvm_context = &kvm_state->kvm_context;
385 kvm_state->fd = fd;
386 kvm_state->vmfd = -1;
387 kvm_context->opaque = cpu_single_env;
388 kvm_context->dirty_pages_log_all = 0;
389 kvm_context->no_irqchip_creation = 0;
390 kvm_context->no_pit_creation = 0;
392 #ifdef KVM_CAP_SET_GUEST_DEBUG
393 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
394 #endif
396 gsi_count = kvm_get_gsi_count(kvm_context);
397 if (gsi_count > 0) {
398 int gsi_bits, i;
400 /* Round up so we can search ints using ffs */
401 gsi_bits = ALIGN(gsi_count, 32);
402 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
403 kvm_context->max_gsi = gsi_bits;
405 /* Mark any over-allocated bits as already in use */
406 for (i = gsi_count; i < gsi_bits; i++)
407 set_gsi(kvm_context, i);
410 pthread_mutex_lock(&qemu_mutex);
411 return kvm_create_context();
413 out_close:
414 close(fd);
415 return -1;
418 static void kvm_finalize(KVMState *s)
420 /* FIXME
421 if (kvm->vcpu_fd[0] != -1)
422 close(kvm->vcpu_fd[0]);
423 if (kvm->vm_fd != -1)
424 close(kvm->vm_fd);
426 close(s->fd);
427 free(s);
430 void kvm_disable_irqchip_creation(kvm_context_t kvm)
432 kvm->no_irqchip_creation = 1;
435 void kvm_disable_pit_creation(kvm_context_t kvm)
437 kvm->no_pit_creation = 1;
440 kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
442 long mmap_size;
443 int r;
444 kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
445 kvm_context_t kvm = kvm_context;
447 vcpu_ctx->kvm = kvm;
448 vcpu_ctx->id = id;
450 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
451 if (r < 0) {
452 fprintf(stderr, "kvm_create_vcpu: %m\n");
453 goto err;
455 vcpu_ctx->fd = r;
457 env->kvm_fd = r;
458 env->kvm_state = kvm_state;
460 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
461 if (mmap_size < 0) {
462 fprintf(stderr, "get vcpu mmap size: %m\n");
463 goto err_fd;
465 vcpu_ctx->run =
466 mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu_ctx->fd,
468 if (vcpu_ctx->run == MAP_FAILED) {
469 fprintf(stderr, "mmap vcpu area: %m\n");
470 goto err_fd;
472 return vcpu_ctx;
473 err_fd:
474 close(vcpu_ctx->fd);
475 err:
476 free(vcpu_ctx);
477 return NULL;
480 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
482 #ifdef KVM_CAP_SET_BOOT_CPU_ID
483 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
484 if (r > 0)
485 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
486 return -ENOSYS;
487 #else
488 return -ENOSYS;
489 #endif
492 int kvm_create_vm(kvm_context_t kvm)
494 int fd;
495 #ifdef KVM_CAP_IRQ_ROUTING
496 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
497 kvm->nr_allocated_irq_routes = 0;
498 #endif
500 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
501 if (fd < 0) {
502 fprintf(stderr, "kvm_create_vm: %m\n");
503 return -1;
505 kvm_state->vmfd = fd;
506 return 0;
509 static int kvm_create_default_phys_mem(kvm_context_t kvm,
510 unsigned long phys_mem_bytes,
511 void **vm_mem)
513 #ifdef KVM_CAP_USER_MEMORY
514 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
515 if (r > 0)
516 return 0;
517 fprintf(stderr,
518 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
519 #else
520 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
521 #endif
522 return -1;
525 void kvm_create_irqchip(kvm_context_t kvm)
527 int r;
529 kvm->irqchip_in_kernel = 0;
530 #ifdef KVM_CAP_IRQCHIP
531 if (!kvm->no_irqchip_creation) {
532 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
533 if (r > 0) { /* kernel irqchip supported */
534 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
535 if (r >= 0) {
536 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
537 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
538 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
539 KVM_CAP_IRQ_INJECT_STATUS);
540 if (r > 0)
541 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
542 #endif
543 kvm->irqchip_in_kernel = 1;
544 } else
545 fprintf(stderr, "Create kernel PIC irqchip failed\n");
548 #endif
551 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
553 int r;
555 r = kvm_create_vm(kvm);
556 if (r < 0)
557 return r;
558 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
559 if (r < 0)
560 return r;
561 init_slots();
562 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
563 if (r < 0)
564 return r;
565 kvm_create_irqchip(kvm);
567 return 0;
571 int kvm_register_phys_mem(kvm_context_t kvm,
572 unsigned long phys_start, void *userspace_addr,
573 unsigned long len, int log)
576 struct kvm_userspace_memory_region memory = {
577 .memory_size = len,
578 .guest_phys_addr = phys_start,
579 .userspace_addr = (unsigned long) (intptr_t) userspace_addr,
580 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
582 int r;
584 memory.slot = get_free_slot(kvm);
585 DPRINTF
586 ("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
587 memory.guest_phys_addr, memory.memory_size, memory.userspace_addr,
588 memory.slot, memory.flags);
589 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
590 if (r < 0) {
591 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
592 return -1;
594 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
595 memory.userspace_addr, memory.flags);
596 return 0;
600 /* destroy/free a whole slot.
601 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
603 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
604 unsigned long len)
606 int slot;
607 int r;
608 struct kvm_userspace_memory_region memory = {
609 .memory_size = 0,
610 .guest_phys_addr = phys_start,
611 .userspace_addr = 0,
612 .flags = 0,
615 slot = get_slot(phys_start);
617 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
618 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", __FUNCTION__,
619 slot);
620 return;
622 if (phys_start != slots[slot].phys_addr) {
623 fprintf(stderr,
624 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
625 __FUNCTION__, phys_start, slots[slot].phys_addr);
626 phys_start = slots[slot].phys_addr;
629 memory.slot = slot;
630 DPRINTF("slot %d start %llx len %llx flags %x\n",
631 memory.slot, memory.guest_phys_addr, memory.memory_size,
632 memory.flags);
633 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
634 if (r < 0) {
635 fprintf(stderr, "destroy_userspace_phys_mem: %s", strerror(-r));
636 return;
639 free_slot(memory.slot);
642 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr,
643 unsigned long size)
646 int slot = get_container_slot(phys_addr, size);
648 if (slot != -1) {
649 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
650 kvm_destroy_phys_mem(kvm, phys_addr, size);
651 return;
655 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
657 int r;
658 struct kvm_dirty_log log = {
659 .slot = slot,
662 log.dirty_bitmap = buf;
664 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
665 if (r < 0)
666 return r;
667 return 0;
670 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
672 int slot;
674 slot = get_slot(phys_addr);
675 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
678 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
679 unsigned long len, void *opaque,
680 int (*cb)(unsigned long start,
681 unsigned long len, void *bitmap,
682 void *opaque))
684 int i;
685 int r;
686 unsigned long end_addr = phys_addr + len;
687 void *buf;
689 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
690 if ((slots[i].len && (uint64_t) slots[i].phys_addr >= phys_addr)
691 && ((uint64_t) slots[i].phys_addr + slots[i].len <= end_addr)) {
692 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
693 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
694 if (r) {
695 qemu_free(buf);
696 return r;
698 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
699 qemu_free(buf);
700 if (r)
701 return r;
704 return 0;
707 #ifdef KVM_CAP_IRQCHIP
709 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
711 struct kvm_irq_level event;
712 int r;
714 if (!kvm->irqchip_in_kernel)
715 return 0;
716 event.level = level;
717 event.irq = irq;
718 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
719 if (r < 0)
720 perror("kvm_set_irq_level");
722 if (status) {
723 #ifdef KVM_CAP_IRQ_INJECT_STATUS
724 *status =
725 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
726 #else
727 *status = 1;
728 #endif
731 return 1;
734 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
736 int r;
738 if (!kvm->irqchip_in_kernel)
739 return 0;
740 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
741 if (r < 0) {
742 perror("kvm_get_irqchip\n");
744 return r;
747 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
749 int r;
751 if (!kvm->irqchip_in_kernel)
752 return 0;
753 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
754 if (r < 0) {
755 perror("kvm_set_irqchip\n");
757 return r;
760 #endif
762 static int handle_io(kvm_vcpu_context_t vcpu)
764 struct kvm_run *run = vcpu->run;
765 kvm_context_t kvm = vcpu->kvm;
766 uint16_t addr = run->io.port;
767 int i;
768 void *p = (void *) run + run->io.data_offset;
770 for (i = 0; i < run->io.count; ++i) {
771 switch (run->io.direction) {
772 case KVM_EXIT_IO_IN:
773 switch (run->io.size) {
774 case 1:
775 *(uint8_t *) p = cpu_inb(kvm->opaque, addr);
776 break;
777 case 2:
778 *(uint16_t *) p = cpu_inw(kvm->opaque, addr);
779 break;
780 case 4:
781 *(uint32_t *) p = cpu_inl(kvm->opaque, addr);
782 break;
783 default:
784 fprintf(stderr, "bad I/O size %d\n", run->io.size);
785 return -EMSGSIZE;
787 break;
788 case KVM_EXIT_IO_OUT:
789 switch (run->io.size) {
790 case 1:
791 cpu_outb(kvm->opaque, addr, *(uint8_t *) p);
792 break;
793 case 2:
794 cpu_outw(kvm->opaque, addr, *(uint16_t *) p);
795 break;
796 case 4:
797 cpu_outl(kvm->opaque, addr, *(uint32_t *) p);
798 break;
799 default:
800 fprintf(stderr, "bad I/O size %d\n", run->io.size);
801 return -EMSGSIZE;
803 break;
804 default:
805 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
806 return -EPROTO;
809 p += run->io.size;
812 return 0;
815 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
817 #ifdef KVM_CAP_SET_GUEST_DEBUG
818 struct kvm_run *run = vcpu->run;
819 kvm_context_t kvm = vcpu->kvm;
821 return kvm_debug(kvm->opaque, env, &run->debug.arch);
822 #else
823 return 0;
824 #endif
827 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
829 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
832 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
834 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
837 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
839 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
842 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
844 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
847 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
849 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
852 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
854 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
857 #ifdef KVM_CAP_MP_STATE
858 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
860 int r;
862 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
863 if (r > 0)
864 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
865 return -ENOSYS;
868 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
870 int r;
872 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
873 if (r > 0)
874 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
875 return -ENOSYS;
877 #endif
879 static int handle_mmio(kvm_vcpu_context_t vcpu)
881 unsigned long addr = vcpu->run->mmio.phys_addr;
882 kvm_context_t kvm = vcpu->kvm;
883 struct kvm_run *kvm_run = vcpu->run;
884 void *data = kvm_run->mmio.data;
886 /* hack: Red Hat 7.1 generates these weird accesses. */
887 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
888 return 0;
890 if (kvm_run->mmio.is_write)
891 return kvm_mmio_write(kvm->opaque, addr, data, kvm_run->mmio.len);
892 else
893 return kvm_mmio_read(kvm->opaque, addr, data, kvm_run->mmio.len);
896 int handle_io_window(kvm_context_t kvm)
898 return 1;
901 int handle_halt(kvm_vcpu_context_t vcpu)
903 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
906 int handle_shutdown(kvm_context_t kvm, CPUState *env)
908 /* stop the current vcpu from going back to guest mode */
909 env->stopped = 1;
911 qemu_system_reset_request();
912 return 1;
915 static inline void push_nmi(kvm_context_t kvm)
917 #ifdef KVM_CAP_USER_NMI
918 kvm_arch_push_nmi(kvm->opaque);
919 #endif /* KVM_CAP_USER_NMI */
922 void post_kvm_run(kvm_context_t kvm, CPUState *env)
924 pthread_mutex_lock(&qemu_mutex);
925 kvm_arch_post_kvm_run(kvm->opaque, env);
928 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
930 kvm_arch_pre_kvm_run(kvm->opaque, env);
932 pthread_mutex_unlock(&qemu_mutex);
933 return 0;
936 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
938 return vcpu->run->if_flag;
941 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
943 return vcpu->run->ready_for_interrupt_injection;
946 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
948 int r;
949 int fd = vcpu->fd;
950 struct kvm_run *run = vcpu->run;
951 kvm_context_t kvm = vcpu->kvm;
952 CPUState *_env = env;
954 again:
955 push_nmi(kvm);
956 #if !defined(__s390__)
957 if (!kvm->irqchip_in_kernel)
958 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
959 #endif
961 if (_env->kvm_cpu_state.regs_modified) {
962 kvm_arch_put_registers(_env);
963 _env->kvm_cpu_state.regs_modified = 0;
966 r = pre_kvm_run(kvm, env);
967 if (r)
968 return r;
969 r = ioctl(fd, KVM_RUN, 0);
971 if (r == -1 && errno != EINTR && errno != EAGAIN) {
972 r = -errno;
973 post_kvm_run(kvm, env);
974 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
975 return r;
978 post_kvm_run(kvm, env);
980 #if defined(KVM_CAP_COALESCED_MMIO)
981 if (kvm_state->coalesced_mmio) {
982 struct kvm_coalesced_mmio_ring *ring =
983 (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
984 while (ring->first != ring->last) {
985 kvm_mmio_write(kvm->opaque,
986 ring->coalesced_mmio[ring->first].phys_addr,
987 &ring->coalesced_mmio[ring->first].data[0],
988 ring->coalesced_mmio[ring->first].len);
989 smp_wmb();
990 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
993 #endif
995 #if !defined(__s390__)
996 if (r == -1) {
997 r = handle_io_window(kvm);
998 goto more;
1000 #endif
1001 if (1) {
1002 switch (run->exit_reason) {
1003 case KVM_EXIT_UNKNOWN:
1004 r = handle_unhandled(run->hw.hardware_exit_reason);
1005 break;
1006 case KVM_EXIT_FAIL_ENTRY:
1007 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
1008 break;
1009 case KVM_EXIT_EXCEPTION:
1010 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
1011 run->ex.error_code);
1012 kvm_show_regs(vcpu);
1013 kvm_show_code(vcpu);
1014 abort();
1015 break;
1016 case KVM_EXIT_IO:
1017 r = handle_io(vcpu);
1018 break;
1019 case KVM_EXIT_DEBUG:
1020 r = handle_debug(vcpu, env);
1021 break;
1022 case KVM_EXIT_MMIO:
1023 r = handle_mmio(vcpu);
1024 break;
1025 case KVM_EXIT_HLT:
1026 r = handle_halt(vcpu);
1027 break;
1028 case KVM_EXIT_IRQ_WINDOW_OPEN:
1029 break;
1030 case KVM_EXIT_SHUTDOWN:
1031 r = handle_shutdown(kvm, env);
1032 break;
1033 #if defined(__s390__)
1034 case KVM_EXIT_S390_SIEIC:
1035 r = kvm_s390_handle_intercept(kvm, vcpu, run);
1036 break;
1037 case KVM_EXIT_S390_RESET:
1038 r = kvm_s390_handle_reset(kvm, vcpu, run);
1039 break;
1040 #endif
1041 case KVM_EXIT_INTERNAL_ERROR:
1042 fprintf(stderr, "KVM internal error. Suberror: %d\n",
1043 run->internal.suberror);
1044 kvm_show_regs(vcpu);
1045 if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION)
1046 fprintf(stderr, "emulation failure, check dmesg for details\n");
1047 abort();
1048 break;
1049 default:
1050 if (kvm_arch_run(vcpu)) {
1051 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
1052 kvm_show_regs(vcpu);
1053 abort();
1055 break;
1058 more:
1059 if (!r)
1060 goto again;
1061 return r;
1064 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1066 struct kvm_interrupt intr;
1068 intr.irq = irq;
1069 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1072 #ifdef KVM_CAP_SET_GUEST_DEBUG
1073 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1075 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1077 #endif
1079 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1081 struct kvm_signal_mask *sigmask;
1082 int r;
1084 if (!sigset) {
1085 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1086 if (r == -1)
1087 r = -errno;
1088 return r;
1090 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1092 sigmask->len = 8;
1093 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1094 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1095 if (r == -1)
1096 r = -errno;
1097 free(sigmask);
1098 return r;
1101 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1103 return kvm->irqchip_in_kernel;
1106 int kvm_pit_in_kernel(kvm_context_t kvm)
1108 return kvm->pit_in_kernel;
1111 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1113 #ifdef KVM_CAP_USER_NMI
1114 return ioctl(vcpu->fd, KVM_NMI);
1115 #else
1116 return -ENOSYS;
1117 #endif
1120 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1122 int r = 0;
1123 kvm_state->coalesced_mmio = 0;
1124 #ifdef KVM_CAP_COALESCED_MMIO
1125 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1126 if (r > 0) {
1127 kvm_state->coalesced_mmio = r;
1128 return 0;
1130 #endif
1131 return r;
1134 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1135 int kvm_assign_pci_device(kvm_context_t kvm,
1136 struct kvm_assigned_pci_dev *assigned_dev)
1138 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1141 static int kvm_old_assign_irq(kvm_context_t kvm,
1142 struct kvm_assigned_irq *assigned_irq)
1144 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1147 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1148 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1150 int ret;
1152 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1153 if (ret > 0) {
1154 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1157 return kvm_old_assign_irq(kvm, assigned_irq);
1160 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1162 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1164 #else
1165 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1167 return kvm_old_assign_irq(kvm, assigned_irq);
1169 #endif
1170 #endif
1172 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1173 int kvm_deassign_pci_device(kvm_context_t kvm,
1174 struct kvm_assigned_pci_dev *assigned_dev)
1176 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1178 #endif
1180 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1182 int ret = 0;
1184 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1185 ret =
1186 kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1187 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1188 if (ret <= 0)
1189 ret = 0;
1190 #endif
1191 return ret;
1194 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1196 #ifdef KVM_CAP_REINJECT_CONTROL
1197 int r;
1198 struct kvm_reinject_control control;
1200 control.pit_reinject = pit_reinject;
1202 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1203 if (r > 0) {
1204 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1206 #endif
1207 return -ENOSYS;
1210 int kvm_has_gsi_routing(kvm_context_t kvm)
1212 int r = 0;
1214 #ifdef KVM_CAP_IRQ_ROUTING
1215 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1216 #endif
1217 return r;
1220 int kvm_get_gsi_count(kvm_context_t kvm)
1222 #ifdef KVM_CAP_IRQ_ROUTING
1223 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1224 #else
1225 return -EINVAL;
1226 #endif
1229 int kvm_clear_gsi_routes(kvm_context_t kvm)
1231 #ifdef KVM_CAP_IRQ_ROUTING
1232 kvm->irq_routes->nr = 0;
1233 return 0;
1234 #else
1235 return -EINVAL;
1236 #endif
1239 int kvm_add_routing_entry(kvm_context_t kvm,
1240 struct kvm_irq_routing_entry *entry)
1242 #ifdef KVM_CAP_IRQ_ROUTING
1243 struct kvm_irq_routing *z;
1244 struct kvm_irq_routing_entry *new;
1245 int n, size;
1247 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1248 n = kvm->nr_allocated_irq_routes * 2;
1249 if (n < 64)
1250 n = 64;
1251 size = sizeof(struct kvm_irq_routing);
1252 size += n * sizeof(*new);
1253 z = realloc(kvm->irq_routes, size);
1254 if (!z)
1255 return -ENOMEM;
1256 kvm->nr_allocated_irq_routes = n;
1257 kvm->irq_routes = z;
1259 n = kvm->irq_routes->nr++;
1260 new = &kvm->irq_routes->entries[n];
1261 memset(new, 0, sizeof(*new));
1262 new->gsi = entry->gsi;
1263 new->type = entry->type;
1264 new->flags = entry->flags;
1265 new->u = entry->u;
1267 set_gsi(kvm, entry->gsi);
1269 return 0;
1270 #else
1271 return -ENOSYS;
1272 #endif
1275 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1277 #ifdef KVM_CAP_IRQ_ROUTING
1278 struct kvm_irq_routing_entry e;
1280 e.gsi = gsi;
1281 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1282 e.flags = 0;
1283 e.u.irqchip.irqchip = irqchip;
1284 e.u.irqchip.pin = pin;
1285 return kvm_add_routing_entry(kvm, &e);
1286 #else
1287 return -ENOSYS;
1288 #endif
1291 int kvm_del_routing_entry(kvm_context_t kvm,
1292 struct kvm_irq_routing_entry *entry)
1294 #ifdef KVM_CAP_IRQ_ROUTING
1295 struct kvm_irq_routing_entry *e, *p;
1296 int i, gsi, found = 0;
1298 gsi = entry->gsi;
1300 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1301 e = &kvm->irq_routes->entries[i];
1302 if (e->type == entry->type && e->gsi == gsi) {
1303 switch (e->type) {
1304 case KVM_IRQ_ROUTING_IRQCHIP:{
1305 if (e->u.irqchip.irqchip ==
1306 entry->u.irqchip.irqchip
1307 && e->u.irqchip.pin == entry->u.irqchip.pin) {
1308 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1309 *e = *p;
1310 found = 1;
1312 break;
1314 case KVM_IRQ_ROUTING_MSI:{
1315 if (e->u.msi.address_lo ==
1316 entry->u.msi.address_lo
1317 && e->u.msi.address_hi ==
1318 entry->u.msi.address_hi
1319 && e->u.msi.data == entry->u.msi.data) {
1320 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1321 *e = *p;
1322 found = 1;
1324 break;
1326 default:
1327 break;
1329 if (found) {
1330 /* If there are no other users of this GSI
1331 * mark it available in the bitmap */
1332 for (i = 0; i < kvm->irq_routes->nr; i++) {
1333 e = &kvm->irq_routes->entries[i];
1334 if (e->gsi == gsi)
1335 break;
1337 if (i == kvm->irq_routes->nr)
1338 clear_gsi(kvm, gsi);
1340 return 0;
1344 return -ESRCH;
1345 #else
1346 return -ENOSYS;
1347 #endif
1350 int kvm_update_routing_entry(kvm_context_t kvm,
1351 struct kvm_irq_routing_entry *entry,
1352 struct kvm_irq_routing_entry *newentry)
1354 #ifdef KVM_CAP_IRQ_ROUTING
1355 struct kvm_irq_routing_entry *e;
1356 int i;
1358 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
1359 return -EINVAL;
1362 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1363 e = &kvm->irq_routes->entries[i];
1364 if (e->type != entry->type || e->gsi != entry->gsi) {
1365 continue;
1367 switch (e->type) {
1368 case KVM_IRQ_ROUTING_IRQCHIP:
1369 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1370 e->u.irqchip.pin == entry->u.irqchip.pin) {
1371 memcpy(&e->u.irqchip, &newentry->u.irqchip,
1372 sizeof e->u.irqchip);
1373 return 0;
1375 break;
1376 case KVM_IRQ_ROUTING_MSI:
1377 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1378 e->u.msi.address_hi == entry->u.msi.address_hi &&
1379 e->u.msi.data == entry->u.msi.data) {
1380 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1381 return 0;
1383 break;
1384 default:
1385 break;
1388 return -ESRCH;
1389 #else
1390 return -ENOSYS;
1391 #endif
1394 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1396 #ifdef KVM_CAP_IRQ_ROUTING
1397 struct kvm_irq_routing_entry e;
1399 e.gsi = gsi;
1400 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1401 e.flags = 0;
1402 e.u.irqchip.irqchip = irqchip;
1403 e.u.irqchip.pin = pin;
1404 return kvm_del_routing_entry(kvm, &e);
1405 #else
1406 return -ENOSYS;
1407 #endif
1410 int kvm_commit_irq_routes(kvm_context_t kvm)
1412 #ifdef KVM_CAP_IRQ_ROUTING
1413 kvm->irq_routes->flags = 0;
1414 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1415 #else
1416 return -ENOSYS;
1417 #endif
1420 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1422 int i, bit;
1423 uint32_t *buf = kvm->used_gsi_bitmap;
1425 /* Return the lowest unused GSI in the bitmap */
1426 for (i = 0; i < kvm->max_gsi / 32; i++) {
1427 bit = ffs(~buf[i]);
1428 if (!bit)
1429 continue;
1431 return bit - 1 + i * 32;
1434 return -ENOSPC;
1437 #ifdef KVM_CAP_DEVICE_MSIX
1438 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1439 struct kvm_assigned_msix_nr *msix_nr)
1441 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1444 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1445 struct kvm_assigned_msix_entry *entry)
1447 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1449 #endif
1451 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1453 #include <sys/eventfd.h>
1455 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1457 struct kvm_irqfd data = {
1458 .fd = fd,
1459 .gsi = gsi,
1460 .flags = flags,
1463 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1466 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1468 int r;
1469 int fd;
1471 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1472 return -ENOENT;
1474 fd = eventfd(0, 0);
1475 if (fd < 0)
1476 return -errno;
1478 r = _kvm_irqfd(kvm, fd, gsi, 0);
1479 if (r < 0) {
1480 close(fd);
1481 return -errno;
1484 return fd;
1487 #else /* KVM_CAP_IRQFD */
1489 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1491 return -ENOSYS;
1494 #endif /* KVM_CAP_IRQFD */
1495 static inline unsigned long kvm_get_thread_id(void)
1497 return syscall(SYS_gettid);
1500 static void qemu_cond_wait(pthread_cond_t *cond)
1502 CPUState *env = cpu_single_env;
1504 pthread_cond_wait(cond, &qemu_mutex);
1505 cpu_single_env = env;
1508 static void sig_ipi_handler(int n)
1512 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1514 struct qemu_work_item wi;
1516 if (env == current_env) {
1517 func(data);
1518 return;
1521 wi.func = func;
1522 wi.data = data;
1523 if (!env->kvm_cpu_state.queued_work_first)
1524 env->kvm_cpu_state.queued_work_first = &wi;
1525 else
1526 env->kvm_cpu_state.queued_work_last->next = &wi;
1527 env->kvm_cpu_state.queued_work_last = &wi;
1528 wi.next = NULL;
1529 wi.done = false;
1531 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1532 while (!wi.done)
1533 qemu_cond_wait(&qemu_work_cond);
1536 static void do_kvm_cpu_synchronize_state(void *_env)
1538 CPUState *env = _env;
1539 if (!env->kvm_cpu_state.regs_modified) {
1540 kvm_arch_get_registers(env);
1541 env->kvm_cpu_state.regs_modified = 1;
1545 void kvm_cpu_synchronize_state(CPUState *env)
1547 if (!env->kvm_cpu_state.regs_modified)
1548 on_vcpu(env, do_kvm_cpu_synchronize_state, env);
1551 static void inject_interrupt(void *data)
1553 cpu_interrupt(current_env, (long) data);
1556 void kvm_inject_interrupt(CPUState *env, int mask)
1558 on_vcpu(env, inject_interrupt, (void *) (long) mask);
1561 void kvm_update_interrupt_request(CPUState *env)
1563 int signal = 0;
1565 if (env) {
1566 if (!current_env || !current_env->created)
1567 signal = 1;
1569 * Testing for created here is really redundant
1571 if (current_env && current_env->created &&
1572 env != current_env && !env->kvm_cpu_state.signalled)
1573 signal = 1;
1575 if (signal) {
1576 env->kvm_cpu_state.signalled = 1;
1577 if (env->kvm_cpu_state.thread)
1578 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1583 static void kvm_do_load_registers(void *_env)
1585 CPUState *env = _env;
1587 kvm_arch_load_regs(env);
1590 void kvm_load_registers(CPUState *env)
1592 if (kvm_enabled() && qemu_system_ready)
1593 on_vcpu(env, kvm_do_load_registers, env);
1596 static void kvm_do_save_registers(void *_env)
1598 CPUState *env = _env;
1600 kvm_arch_save_regs(env);
1603 void kvm_save_registers(CPUState *env)
1605 if (kvm_enabled())
1606 on_vcpu(env, kvm_do_save_registers, env);
1609 static void kvm_do_load_mpstate(void *_env)
1611 CPUState *env = _env;
1613 kvm_arch_load_mpstate(env);
1616 void kvm_load_mpstate(CPUState *env)
1618 if (kvm_enabled() && qemu_system_ready)
1619 on_vcpu(env, kvm_do_load_mpstate, env);
1622 static void kvm_do_save_mpstate(void *_env)
1624 CPUState *env = _env;
1626 kvm_arch_save_mpstate(env);
1629 void kvm_save_mpstate(CPUState *env)
1631 if (kvm_enabled())
1632 on_vcpu(env, kvm_do_save_mpstate, env);
1635 int kvm_cpu_exec(CPUState *env)
1637 int r;
1639 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1640 if (r < 0) {
1641 printf("kvm_run returned %d\n", r);
1642 vm_stop(0);
1645 return 0;
1648 static int is_cpu_stopped(CPUState *env)
1650 return !vm_running || env->stopped;
1653 static void flush_queued_work(CPUState *env)
1655 struct qemu_work_item *wi;
1657 if (!env->kvm_cpu_state.queued_work_first)
1658 return;
1660 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1661 env->kvm_cpu_state.queued_work_first = wi->next;
1662 wi->func(wi->data);
1663 wi->done = true;
1665 env->kvm_cpu_state.queued_work_last = NULL;
1666 pthread_cond_broadcast(&qemu_work_cond);
1669 static void kvm_main_loop_wait(CPUState *env, int timeout)
1671 struct timespec ts;
1672 int r, e;
1673 siginfo_t siginfo;
1674 sigset_t waitset;
1676 pthread_mutex_unlock(&qemu_mutex);
1678 ts.tv_sec = timeout / 1000;
1679 ts.tv_nsec = (timeout % 1000) * 1000000;
1680 sigemptyset(&waitset);
1681 sigaddset(&waitset, SIG_IPI);
1683 r = sigtimedwait(&waitset, &siginfo, &ts);
1684 e = errno;
1686 pthread_mutex_lock(&qemu_mutex);
1688 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1689 printf("sigtimedwait: %s\n", strerror(e));
1690 exit(1);
1693 cpu_single_env = env;
1694 flush_queued_work(env);
1696 if (env->stop) {
1697 env->stop = 0;
1698 env->stopped = 1;
1699 pthread_cond_signal(&qemu_pause_cond);
1702 env->kvm_cpu_state.signalled = 0;
1705 static int all_threads_paused(void)
1707 CPUState *penv = first_cpu;
1709 while (penv) {
1710 if (penv->stop)
1711 return 0;
1712 penv = (CPUState *) penv->next_cpu;
1715 return 1;
1718 static void pause_all_threads(void)
1720 CPUState *penv = first_cpu;
1722 while (penv) {
1723 if (penv != cpu_single_env) {
1724 penv->stop = 1;
1725 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1726 } else {
1727 penv->stop = 0;
1728 penv->stopped = 1;
1729 cpu_exit(penv);
1731 penv = (CPUState *) penv->next_cpu;
1734 while (!all_threads_paused())
1735 qemu_cond_wait(&qemu_pause_cond);
1738 static void resume_all_threads(void)
1740 CPUState *penv = first_cpu;
1742 assert(!cpu_single_env);
1744 while (penv) {
1745 penv->stop = 0;
1746 penv->stopped = 0;
1747 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1748 penv = (CPUState *) penv->next_cpu;
1752 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1754 if (running)
1755 resume_all_threads();
1756 else
1757 pause_all_threads();
1760 static void setup_kernel_sigmask(CPUState *env)
1762 sigset_t set;
1764 sigemptyset(&set);
1765 sigaddset(&set, SIGUSR2);
1766 sigaddset(&set, SIGIO);
1767 sigaddset(&set, SIGALRM);
1768 sigprocmask(SIG_BLOCK, &set, NULL);
1770 sigprocmask(SIG_BLOCK, NULL, &set);
1771 sigdelset(&set, SIG_IPI);
1773 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1776 static void qemu_kvm_system_reset(void)
1778 CPUState *penv = first_cpu;
1780 pause_all_threads();
1782 qemu_system_reset();
1784 while (penv) {
1785 kvm_arch_cpu_reset(penv);
1786 penv = (CPUState *) penv->next_cpu;
1789 resume_all_threads();
1792 static void process_irqchip_events(CPUState *env)
1794 kvm_arch_process_irqchip_events(env);
1795 if (kvm_arch_has_work(env))
1796 env->halted = 0;
1799 static int kvm_main_loop_cpu(CPUState *env)
1801 setup_kernel_sigmask(env);
1803 pthread_mutex_lock(&qemu_mutex);
1805 kvm_arch_init_vcpu(env);
1806 #ifdef TARGET_I386
1807 kvm_tpr_vcpu_start(env);
1808 #endif
1810 cpu_single_env = env;
1811 kvm_arch_load_regs(env);
1813 while (1) {
1814 int run_cpu = !is_cpu_stopped(env);
1815 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
1816 process_irqchip_events(env);
1817 run_cpu = !env->halted;
1819 if (run_cpu) {
1820 kvm_main_loop_wait(env, 0);
1821 kvm_cpu_exec(env);
1822 } else {
1823 kvm_main_loop_wait(env, 1000);
1826 pthread_mutex_unlock(&qemu_mutex);
1827 return 0;
1830 static void *ap_main_loop(void *_env)
1832 CPUState *env = _env;
1833 sigset_t signals;
1834 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1835 struct ioperm_data *data = NULL;
1836 #endif
1838 current_env = env;
1839 env->thread_id = kvm_get_thread_id();
1840 sigfillset(&signals);
1841 sigprocmask(SIG_BLOCK, &signals, NULL);
1842 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index);
1844 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1845 /* do ioperm for io ports of assigned devices */
1846 QLIST_FOREACH(data, &ioperm_head, entries)
1847 on_vcpu(env, kvm_arch_do_ioperm, data);
1848 #endif
1850 /* signal VCPU creation */
1851 pthread_mutex_lock(&qemu_mutex);
1852 current_env->created = 1;
1853 pthread_cond_signal(&qemu_vcpu_cond);
1855 /* and wait for machine initialization */
1856 while (!qemu_system_ready)
1857 qemu_cond_wait(&qemu_system_cond);
1858 pthread_mutex_unlock(&qemu_mutex);
1860 kvm_main_loop_cpu(env);
1861 return NULL;
1864 void kvm_init_vcpu(CPUState *env)
1866 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1868 while (env->created == 0)
1869 qemu_cond_wait(&qemu_vcpu_cond);
1872 int kvm_vcpu_inited(CPUState *env)
1874 return env->created;
1877 #ifdef TARGET_I386
1878 void kvm_hpet_disable_kpit(void)
1880 struct kvm_pit_state2 ps2;
1882 kvm_get_pit2(kvm_context, &ps2);
1883 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1884 kvm_set_pit2(kvm_context, &ps2);
1887 void kvm_hpet_enable_kpit(void)
1889 struct kvm_pit_state2 ps2;
1891 kvm_get_pit2(kvm_context, &ps2);
1892 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1893 kvm_set_pit2(kvm_context, &ps2);
1895 #endif
1897 int kvm_init_ap(void)
1899 #ifdef TARGET_I386
1900 kvm_tpr_opt_setup();
1901 #endif
1902 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1904 signal(SIG_IPI, sig_ipi_handler);
1905 return 0;
1908 void qemu_kvm_notify_work(void)
1910 uint64_t value = 1;
1911 char buffer[8];
1912 size_t offset = 0;
1914 if (io_thread_fd == -1)
1915 return;
1917 memcpy(buffer, &value, sizeof(value));
1919 while (offset < 8) {
1920 ssize_t len;
1922 len = write(io_thread_fd, buffer + offset, 8 - offset);
1923 if (len == -1 && errno == EINTR)
1924 continue;
1926 /* In case we have a pipe, there is not reason to insist writing
1927 * 8 bytes
1929 if (len == -1 && errno == EAGAIN)
1930 break;
1932 if (len <= 0)
1933 break;
1935 offset += len;
1939 /* If we have signalfd, we mask out the signals we want to handle and then
1940 * use signalfd to listen for them. We rely on whatever the current signal
1941 * handler is to dispatch the signals when we receive them.
1944 static void sigfd_handler(void *opaque)
1946 int fd = (unsigned long) opaque;
1947 struct qemu_signalfd_siginfo info;
1948 struct sigaction action;
1949 ssize_t len;
1951 while (1) {
1952 do {
1953 len = read(fd, &info, sizeof(info));
1954 } while (len == -1 && errno == EINTR);
1956 if (len == -1 && errno == EAGAIN)
1957 break;
1959 if (len != sizeof(info)) {
1960 printf("read from sigfd returned %zd: %m\n", len);
1961 return;
1964 sigaction(info.ssi_signo, NULL, &action);
1965 if (action.sa_handler)
1966 action.sa_handler(info.ssi_signo);
1971 /* Used to break IO thread out of select */
1972 static void io_thread_wakeup(void *opaque)
1974 int fd = (unsigned long) opaque;
1975 char buffer[4096];
1977 /* Drain the pipe/(eventfd) */
1978 while (1) {
1979 ssize_t len;
1981 len = read(fd, buffer, sizeof(buffer));
1982 if (len == -1 && errno == EINTR)
1983 continue;
1985 if (len <= 0)
1986 break;
1990 int kvm_main_loop(void)
1992 int fds[2];
1993 sigset_t mask;
1994 int sigfd;
1996 io_thread = pthread_self();
1997 qemu_system_ready = 1;
1999 if (qemu_eventfd(fds) == -1) {
2000 fprintf(stderr, "failed to create eventfd\n");
2001 return -errno;
2004 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2005 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2007 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2008 (void *)(unsigned long) fds[0]);
2010 io_thread_fd = fds[1];
2012 sigemptyset(&mask);
2013 sigaddset(&mask, SIGIO);
2014 sigaddset(&mask, SIGALRM);
2015 sigprocmask(SIG_BLOCK, &mask, NULL);
2017 sigfd = qemu_signalfd(&mask);
2018 if (sigfd == -1) {
2019 fprintf(stderr, "failed to create signalfd\n");
2020 return -errno;
2023 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2025 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2026 (void *)(unsigned long) sigfd);
2028 pthread_cond_broadcast(&qemu_system_cond);
2030 io_thread_sigfd = sigfd;
2031 cpu_single_env = NULL;
2033 while (1) {
2034 main_loop_wait(1000);
2035 if (qemu_shutdown_requested()) {
2036 if (qemu_no_shutdown()) {
2037 vm_stop(0);
2038 } else
2039 break;
2040 } else if (qemu_powerdown_requested())
2041 qemu_irq_raise(qemu_system_powerdown);
2042 else if (qemu_reset_requested())
2043 qemu_kvm_system_reset();
2044 else if (kvm_debug_cpu_requested) {
2045 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2046 vm_stop(EXCP_DEBUG);
2047 kvm_debug_cpu_requested = NULL;
2051 pause_all_threads();
2052 pthread_mutex_unlock(&qemu_mutex);
2054 return 0;
2057 #ifdef TARGET_I386
2058 static int destroy_region_works = 0;
2059 #endif
2062 #if !defined(TARGET_I386)
2063 int kvm_arch_init_irq_routing(void)
2065 return 0;
2067 #endif
2069 extern int no_hpet;
2071 static int kvm_create_context()
2073 int r;
2075 if (!kvm_irqchip) {
2076 kvm_disable_irqchip_creation(kvm_context);
2078 if (!kvm_pit) {
2079 kvm_disable_pit_creation(kvm_context);
2081 if (kvm_create(kvm_context, 0, NULL) < 0) {
2082 kvm_finalize(kvm_state);
2083 return -1;
2085 r = kvm_arch_qemu_create_context();
2086 if (r < 0)
2087 kvm_finalize(kvm_state);
2088 if (kvm_pit && !kvm_pit_reinject) {
2089 if (kvm_reinject_control(kvm_context, 0)) {
2090 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2091 return -1;
2094 #ifdef TARGET_I386
2095 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2096 #endif
2098 r = kvm_arch_init_irq_routing();
2099 if (r < 0) {
2100 return r;
2103 kvm_init_ap();
2104 if (kvm_irqchip) {
2105 if (!qemu_kvm_has_gsi_routing()) {
2106 irq0override = 0;
2107 #ifdef TARGET_I386
2108 /* if kernel can't do irq routing, interrupt source
2109 * override 0->2 can not be set up as required by hpet,
2110 * so disable hpet.
2112 no_hpet = 1;
2113 } else if (!qemu_kvm_has_pit_state2()) {
2114 no_hpet = 1;
2116 #else
2118 #endif
2121 return 0;
2124 #ifdef TARGET_I386
2125 static int must_use_aliases_source(target_phys_addr_t addr)
2127 if (destroy_region_works)
2128 return false;
2129 if (addr == 0xa0000 || addr == 0xa8000)
2130 return true;
2131 return false;
2134 static int must_use_aliases_target(target_phys_addr_t addr)
2136 if (destroy_region_works)
2137 return false;
2138 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2139 return true;
2140 return false;
2143 static struct mapping {
2144 target_phys_addr_t phys;
2145 ram_addr_t ram;
2146 ram_addr_t len;
2147 } mappings[50];
2148 static int nr_mappings;
2150 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2152 struct mapping *p;
2154 for (p = mappings; p < mappings + nr_mappings; ++p) {
2155 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2156 return p;
2159 return NULL;
2162 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2164 struct mapping *p;
2166 for (p = mappings; p < mappings + nr_mappings; ++p) {
2167 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2168 return p;
2171 return NULL;
2174 static void drop_mapping(target_phys_addr_t start_addr)
2176 struct mapping *p = find_mapping(start_addr);
2178 if (p)
2179 *p = mappings[--nr_mappings];
2181 #endif
2183 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2184 ram_addr_t phys_offset)
2186 int r = 0;
2187 unsigned long area_flags;
2188 #ifdef TARGET_I386
2189 struct mapping *p;
2190 #endif
2192 if (start_addr + size > phys_ram_size) {
2193 phys_ram_size = start_addr + size;
2196 phys_offset &= ~IO_MEM_ROM;
2197 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2199 if (area_flags != IO_MEM_RAM) {
2200 #ifdef TARGET_I386
2201 if (must_use_aliases_source(start_addr)) {
2202 kvm_destroy_memory_alias(kvm_context, start_addr);
2203 return;
2205 if (must_use_aliases_target(start_addr))
2206 return;
2207 #endif
2208 while (size > 0) {
2209 p = find_mapping(start_addr);
2210 if (p) {
2211 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2212 drop_mapping(p->phys);
2214 start_addr += TARGET_PAGE_SIZE;
2215 if (size > TARGET_PAGE_SIZE) {
2216 size -= TARGET_PAGE_SIZE;
2217 } else {
2218 size = 0;
2221 return;
2224 r = kvm_is_containing_region(kvm_context, start_addr, size);
2225 if (r)
2226 return;
2228 if (area_flags >= TLB_MMIO)
2229 return;
2231 #ifdef TARGET_I386
2232 if (must_use_aliases_source(start_addr)) {
2233 p = find_ram_mapping(phys_offset);
2234 if (p) {
2235 kvm_create_memory_alias(kvm_context, start_addr, size,
2236 p->phys + (phys_offset - p->ram));
2238 return;
2240 #endif
2242 r = kvm_register_phys_mem(kvm_context, start_addr,
2243 qemu_get_ram_ptr(phys_offset), size, 0);
2244 if (r < 0) {
2245 printf("kvm_cpu_register_physical_memory: failed\n");
2246 exit(1);
2248 #ifdef TARGET_I386
2249 drop_mapping(start_addr);
2250 p = &mappings[nr_mappings++];
2251 p->phys = start_addr;
2252 p->ram = phys_offset;
2253 p->len = size;
2254 #endif
2256 return;
2259 int kvm_setup_guest_memory(void *area, unsigned long size)
2261 int ret = 0;
2263 #ifdef MADV_DONTFORK
2264 if (kvm_enabled() && !kvm_has_sync_mmu())
2265 ret = madvise(area, size, MADV_DONTFORK);
2266 #endif
2268 if (ret)
2269 perror("madvise");
2271 return ret;
2274 #ifdef KVM_CAP_SET_GUEST_DEBUG
2276 struct kvm_set_guest_debug_data {
2277 struct kvm_guest_debug dbg;
2278 int err;
2281 static void kvm_invoke_set_guest_debug(void *data)
2283 struct kvm_set_guest_debug_data *dbg_data = data;
2285 if (cpu_single_env->kvm_cpu_state.regs_modified) {
2286 kvm_arch_put_registers(cpu_single_env);
2287 cpu_single_env->kvm_cpu_state.regs_modified = 0;
2289 dbg_data->err =
2290 kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2291 &dbg_data->dbg);
2294 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2296 struct kvm_set_guest_debug_data data;
2298 data.dbg.control = 0;
2299 if (env->singlestep_enabled)
2300 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2302 kvm_arch_update_guest_debug(env, &data.dbg);
2303 data.dbg.control |= reinject_trap;
2305 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2306 return data.err;
2309 #endif
2312 * dirty pages logging
2314 /* FIXME: use unsigned long pointer instead of unsigned char */
2315 unsigned char *kvm_dirty_bitmap = NULL;
2316 int kvm_physical_memory_set_dirty_tracking(int enable)
2318 int r = 0;
2320 if (!kvm_enabled())
2321 return 0;
2323 if (enable) {
2324 if (!kvm_dirty_bitmap) {
2325 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2326 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2327 r = kvm_dirty_pages_log_enable_all(kvm_context);
2329 } else {
2330 if (kvm_dirty_bitmap) {
2331 r = kvm_dirty_pages_log_reset(kvm_context);
2332 qemu_free(kvm_dirty_bitmap);
2333 kvm_dirty_bitmap = NULL;
2336 return r;
2339 /* get kvm's dirty pages bitmap and update qemu's */
2340 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2341 unsigned char *bitmap,
2342 unsigned long offset,
2343 unsigned long mem_size)
2345 unsigned int i, j, n = 0;
2346 unsigned char c;
2347 unsigned long page_number, addr, addr1;
2348 ram_addr_t ram_addr;
2349 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + 7) / 8;
2352 * bitmap-traveling is faster than memory-traveling (for addr...)
2353 * especially when most of the memory is not dirty.
2355 for (i = 0; i < len; i++) {
2356 c = bitmap[i];
2357 while (c > 0) {
2358 j = ffsl(c) - 1;
2359 c &= ~(1u << j);
2360 page_number = i * 8 + j;
2361 addr1 = page_number * TARGET_PAGE_SIZE;
2362 addr = offset + addr1;
2363 ram_addr = cpu_get_physical_page_desc(addr);
2364 cpu_physical_memory_set_dirty(ram_addr);
2365 n++;
2368 return 0;
2371 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2372 void *bitmap, void *opaque)
2374 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2378 * get kvm's dirty pages bitmap and update qemu's
2379 * we only care about physical ram, which resides in slots 0 and 3
2381 int kvm_update_dirty_pages_log(void)
2383 int r = 0;
2386 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL, NULL,
2387 kvm_get_dirty_bitmap_cb);
2388 return r;
2391 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2392 int log)
2394 if (log)
2395 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2396 else {
2397 #ifdef TARGET_I386
2398 if (must_use_aliases_target(start))
2399 return;
2400 #endif
2401 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2405 #ifdef KVM_CAP_IRQCHIP
2407 int kvm_set_irq(int irq, int level, int *status)
2409 return kvm_set_irq_level(kvm_context, irq, level, status);
2412 #endif
2414 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2416 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2419 void kvm_mutex_unlock(void)
2421 assert(!cpu_single_env);
2422 pthread_mutex_unlock(&qemu_mutex);
2425 void kvm_mutex_lock(void)
2427 pthread_mutex_lock(&qemu_mutex);
2428 cpu_single_env = NULL;
2431 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2432 void kvm_add_ioperm_data(struct ioperm_data *data)
2434 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
2437 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2439 struct ioperm_data *data;
2441 data = QLIST_FIRST(&ioperm_head);
2442 while (data) {
2443 struct ioperm_data *next = QLIST_NEXT(data, entries);
2445 if (data->start_port == start_port && data->num == num) {
2446 QLIST_REMOVE(data, entries);
2447 qemu_free(data);
2450 data = next;
2454 void kvm_ioperm(CPUState *env, void *data)
2456 if (kvm_enabled() && qemu_system_ready)
2457 on_vcpu(env, kvm_arch_do_ioperm, data);
2460 #endif
2462 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2463 target_phys_addr_t end_addr)
2465 #ifndef TARGET_IA64
2467 #ifdef TARGET_I386
2468 if (must_use_aliases_source(start_addr))
2469 return 0;
2470 #endif
2472 kvm_get_dirty_pages_range(kvm_context, start_addr,
2473 end_addr - start_addr, NULL,
2474 kvm_get_dirty_bitmap_cb);
2475 #endif
2476 return 0;
2479 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2481 #ifdef TARGET_I386
2482 if (must_use_aliases_source(phys_addr))
2483 return 0;
2484 #endif
2486 #ifndef TARGET_IA64
2487 kvm_qemu_log_memory(phys_addr, len, 1);
2488 #endif
2489 return 0;
2492 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2494 #ifdef TARGET_I386
2495 if (must_use_aliases_source(phys_addr))
2496 return 0;
2497 #endif
2499 #ifndef TARGET_IA64
2500 kvm_qemu_log_memory(phys_addr, len, 0);
2501 #endif
2502 return 0;
2505 int kvm_set_boot_cpu_id(uint32_t id)
2507 return kvm_set_boot_vcpu_id(kvm_context, id);
2510 #ifdef TARGET_I386
2511 #ifdef KVM_CAP_MCE
2512 struct kvm_x86_mce_data {
2513 CPUState *env;
2514 struct kvm_x86_mce *mce;
2517 static void kvm_do_inject_x86_mce(void *_data)
2519 struct kvm_x86_mce_data *data = _data;
2520 int r;
2522 r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce);
2523 if (r < 0)
2524 perror("kvm_set_mce FAILED");
2526 #endif
2528 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2529 uint64_t mcg_status, uint64_t addr, uint64_t misc)
2531 #ifdef KVM_CAP_MCE
2532 struct kvm_x86_mce mce = {
2533 .bank = bank,
2534 .status = status,
2535 .mcg_status = mcg_status,
2536 .addr = addr,
2537 .misc = misc,
2539 struct kvm_x86_mce_data data = {
2540 .env = cenv,
2541 .mce = &mce,
2544 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2545 #endif
2547 #endif