Merge commit '80a1ab598c6960e7a941e38c5d8638c532c585c3' into upstream-merge
[qemu-kvm/amd-iommu.git] / qemu-kvm.c
blobcf28182455cf9d38c96f9083c50a46a541a28b89
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
20 #include "monitor.h"
22 #include "qemu-kvm.h"
23 #include "libkvm.h"
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
30 #include "compatfd.h"
31 #include <sys/prctl.h>
33 #define false 0
34 #define true 1
36 #ifndef PR_MCE_KILL
37 #define PR_MCE_KILL 33
38 #endif
40 #ifndef BUS_MCEERR_AR
41 #define BUS_MCEERR_AR 4
42 #endif
43 #ifndef BUS_MCEERR_AO
44 #define BUS_MCEERR_AO 5
45 #endif
47 #define EXPECTED_KVM_API_VERSION 12
49 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
50 #error libkvm: userspace and kernel version mismatch
51 #endif
53 int kvm_allowed = 1;
54 int kvm_irqchip = 1;
55 int kvm_pit = 1;
56 int kvm_pit_reinject = 1;
57 int kvm_nested = 0;
60 KVMState *kvm_state;
61 kvm_context_t kvm_context;
63 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
64 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
65 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
66 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
67 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
68 __thread CPUState *current_env;
70 static int qemu_system_ready;
72 #define SIG_IPI (SIGRTMIN+4)
74 pthread_t io_thread;
75 static int io_thread_fd = -1;
76 static int io_thread_sigfd = -1;
78 static CPUState *kvm_debug_cpu_requested;
80 static uint64_t phys_ram_size;
82 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
83 /* The list of ioperm_data */
84 static QLIST_HEAD(, ioperm_data) ioperm_head;
85 #endif
87 //#define DEBUG_MEMREG
88 #ifdef DEBUG_MEMREG
89 #define DPRINTF(fmt, args...) \
90 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
91 #else
92 #define DPRINTF(fmt, args...) do {} while (0)
93 #endif
95 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
97 int kvm_abi = EXPECTED_KVM_API_VERSION;
98 int kvm_page_size;
100 #ifdef KVM_CAP_SET_GUEST_DEBUG
101 static int kvm_debug(CPUState *env,
102 struct kvm_debug_exit_arch *arch_info)
104 int handle = kvm_arch_debug(arch_info);
106 if (handle) {
107 kvm_debug_cpu_requested = env;
108 env->stopped = 1;
110 return handle;
112 #endif
114 static int handle_unhandled(uint64_t reason)
116 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
117 return -EINVAL;
121 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
123 uint32_t *bitmap = kvm->used_gsi_bitmap;
125 if (gsi < kvm->max_gsi)
126 bitmap[gsi / 32] |= 1U << (gsi % 32);
127 else
128 DPRINTF("Invalid GSI %u\n", gsi);
131 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
133 uint32_t *bitmap = kvm->used_gsi_bitmap;
135 if (gsi < kvm->max_gsi)
136 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
137 else
138 DPRINTF("Invalid GSI %u\n", gsi);
141 struct slot_info {
142 unsigned long phys_addr;
143 unsigned long len;
144 unsigned long userspace_addr;
145 unsigned flags;
146 int logging_count;
149 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
151 static void init_slots(void)
153 int i;
155 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
156 slots[i].len = 0;
159 static int get_free_slot(kvm_context_t kvm)
161 int i;
162 int tss_ext;
164 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
165 tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
166 #else
167 tss_ext = 0;
168 #endif
171 * on older kernels where the set tss ioctl is not supprted we must save
172 * slot 0 to hold the extended memory, as the vmx will use the last 3
173 * pages of this slot.
175 if (tss_ext > 0)
176 i = 0;
177 else
178 i = 1;
180 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
181 if (!slots[i].len)
182 return i;
183 return -1;
186 static void register_slot(int slot, unsigned long phys_addr,
187 unsigned long len, unsigned long userspace_addr,
188 unsigned flags)
190 slots[slot].phys_addr = phys_addr;
191 slots[slot].len = len;
192 slots[slot].userspace_addr = userspace_addr;
193 slots[slot].flags = flags;
196 static void free_slot(int slot)
198 slots[slot].len = 0;
199 slots[slot].logging_count = 0;
202 static int get_slot(unsigned long phys_addr)
204 int i;
206 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
207 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
208 (slots[i].phys_addr + slots[i].len - 1) >= phys_addr)
209 return i;
211 return -1;
214 /* Returns -1 if this slot is not totally contained on any other,
215 * and the number of the slot otherwise */
216 static int get_container_slot(uint64_t phys_addr, unsigned long size)
218 int i;
220 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
221 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
222 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
223 return i;
224 return -1;
227 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr,
228 unsigned long size)
230 int slot = get_container_slot(phys_addr, size);
231 if (slot == -1)
232 return 0;
233 return 1;
237 * dirty pages logging control
239 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
240 unsigned long phys_addr, unsigned flags,
241 unsigned mask)
243 int r = -1;
244 int slot = get_slot(phys_addr);
246 if (slot == -1) {
247 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
248 return 1;
251 flags = (slots[slot].flags & ~mask) | flags;
252 if (flags == slots[slot].flags)
253 return 0;
254 slots[slot].flags = flags;
257 struct kvm_userspace_memory_region mem = {
258 .slot = slot,
259 .memory_size = slots[slot].len,
260 .guest_phys_addr = slots[slot].phys_addr,
261 .userspace_addr = slots[slot].userspace_addr,
262 .flags = slots[slot].flags,
266 DPRINTF("slot %d start %llx len %llx flags %x\n",
267 mem.slot, mem.guest_phys_addr, mem.memory_size, mem.flags);
268 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem);
269 if (r < 0)
270 fprintf(stderr, "%s: %m\n", __FUNCTION__);
272 return r;
275 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
276 int (*change)(kvm_context_t kvm,
277 uint64_t start,
278 uint64_t len))
280 int i, r;
282 for (i = r = 0; i < KVM_MAX_NUM_MEM_REGIONS && r == 0; i++) {
283 if (slots[i].len)
284 r = change(kvm, slots[i].phys_addr, slots[i].len);
286 return r;
289 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, uint64_t phys_addr,
290 uint64_t len)
292 int slot = get_slot(phys_addr);
294 DPRINTF("start %" PRIx64 " len %" PRIx64 "\n", phys_addr, len);
295 if (slot == -1) {
296 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
297 return -EINVAL;
300 if (slots[slot].logging_count++)
301 return 0;
303 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
304 KVM_MEM_LOG_DIRTY_PAGES,
305 KVM_MEM_LOG_DIRTY_PAGES);
308 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, uint64_t phys_addr,
309 uint64_t len)
311 int slot = get_slot(phys_addr);
313 if (slot == -1) {
314 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
315 return -EINVAL;
318 if (--slots[slot].logging_count)
319 return 0;
321 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, 0,
322 KVM_MEM_LOG_DIRTY_PAGES);
326 * Enable dirty page logging for all memory regions
328 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
330 if (kvm->dirty_pages_log_all)
331 return 0;
332 kvm->dirty_pages_log_all = 1;
333 return kvm_dirty_pages_log_change_all(kvm, kvm_dirty_pages_log_enable_slot);
337 * Enable dirty page logging only for memory regions that were created with
338 * dirty logging enabled (disable for all other memory regions).
340 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
342 if (!kvm->dirty_pages_log_all)
343 return 0;
344 kvm->dirty_pages_log_all = 0;
345 return kvm_dirty_pages_log_change_all(kvm,
346 kvm_dirty_pages_log_disable_slot);
350 static int kvm_create_context(void);
352 int kvm_init(int smp_cpus)
354 int fd;
355 int r, gsi_count;
358 fd = open("/dev/kvm", O_RDWR);
359 if (fd == -1) {
360 perror("open /dev/kvm");
361 return -1;
363 r = ioctl(fd, KVM_GET_API_VERSION, 0);
364 if (r == -1) {
365 fprintf(stderr,
366 "kvm kernel version too old: "
367 "KVM_GET_API_VERSION ioctl not supported\n");
368 goto out_close;
370 if (r < EXPECTED_KVM_API_VERSION) {
371 fprintf(stderr, "kvm kernel version too old: "
372 "We expect API version %d or newer, but got "
373 "version %d\n", EXPECTED_KVM_API_VERSION, r);
374 goto out_close;
376 if (r > EXPECTED_KVM_API_VERSION) {
377 fprintf(stderr, "kvm userspace version too old\n");
378 goto out_close;
380 kvm_abi = r;
381 kvm_page_size = getpagesize();
382 kvm_state = qemu_mallocz(sizeof(*kvm_state));
383 kvm_context = &kvm_state->kvm_context;
385 kvm_state->fd = fd;
386 kvm_state->vmfd = -1;
387 kvm_context->opaque = cpu_single_env;
388 kvm_context->dirty_pages_log_all = 0;
389 kvm_context->no_irqchip_creation = 0;
390 kvm_context->no_pit_creation = 0;
392 #ifdef KVM_CAP_SET_GUEST_DEBUG
393 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
394 #endif
396 gsi_count = kvm_get_gsi_count(kvm_context);
397 if (gsi_count > 0) {
398 int gsi_bits, i;
400 /* Round up so we can search ints using ffs */
401 gsi_bits = ALIGN(gsi_count, 32);
402 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
403 kvm_context->max_gsi = gsi_bits;
405 /* Mark any over-allocated bits as already in use */
406 for (i = gsi_count; i < gsi_bits; i++)
407 set_gsi(kvm_context, i);
410 kvm_cpu_register_phys_memory_client();
412 pthread_mutex_lock(&qemu_mutex);
413 return kvm_create_context();
415 out_close:
416 close(fd);
417 return -1;
420 static void kvm_finalize(KVMState *s)
422 /* FIXME
423 if (kvm->vcpu_fd[0] != -1)
424 close(kvm->vcpu_fd[0]);
425 if (kvm->vm_fd != -1)
426 close(kvm->vm_fd);
428 close(s->fd);
429 free(s);
432 void kvm_disable_irqchip_creation(kvm_context_t kvm)
434 kvm->no_irqchip_creation = 1;
437 void kvm_disable_pit_creation(kvm_context_t kvm)
439 kvm->no_pit_creation = 1;
442 static void kvm_reset_vcpu(void *opaque)
444 CPUState *env = opaque;
446 kvm_arch_cpu_reset(env);
449 static void kvm_create_vcpu(CPUState *env, int id)
451 long mmap_size;
452 int r;
453 KVMState *s = kvm_state;
455 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
456 if (r < 0) {
457 fprintf(stderr, "kvm_create_vcpu: %m\n");
458 fprintf(stderr, "Failed to create vCPU. Check the -smp parameter.\n");
459 goto err;
462 env->kvm_fd = r;
463 env->kvm_state = kvm_state;
465 mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
466 if (mmap_size < 0) {
467 fprintf(stderr, "get vcpu mmap size: %m\n");
468 goto err_fd;
470 env->kvm_run =
471 mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,
473 if (env->kvm_run == MAP_FAILED) {
474 fprintf(stderr, "mmap vcpu area: %m\n");
475 goto err_fd;
478 #ifdef KVM_CAP_COALESCED_MMIO
479 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
480 s->coalesced_mmio_ring = (void *) env->kvm_run +
481 s->coalesced_mmio * PAGE_SIZE;
482 #endif
484 r = kvm_arch_init_vcpu(env);
485 if (r == 0) {
486 qemu_register_reset(kvm_reset_vcpu, env);
489 return;
490 err_fd:
491 close(env->kvm_fd);
492 err:
493 /* We're no good with semi-broken states. */
494 abort();
497 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
499 #ifdef KVM_CAP_SET_BOOT_CPU_ID
500 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
501 if (r > 0)
502 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
503 return -ENOSYS;
504 #else
505 return -ENOSYS;
506 #endif
509 int kvm_create_vm(kvm_context_t kvm)
511 int fd;
512 #ifdef KVM_CAP_IRQ_ROUTING
513 kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
514 kvm->nr_allocated_irq_routes = 0;
515 #endif
517 fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
518 if (fd < 0) {
519 fprintf(stderr, "kvm_create_vm: %m\n");
520 return -1;
522 kvm_state->vmfd = fd;
523 return 0;
526 static int kvm_create_default_phys_mem(kvm_context_t kvm,
527 unsigned long phys_mem_bytes,
528 void **vm_mem)
530 #ifdef KVM_CAP_USER_MEMORY
531 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
532 if (r > 0)
533 return 0;
534 fprintf(stderr,
535 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
536 #else
537 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
538 #endif
539 return -1;
542 void kvm_create_irqchip(kvm_context_t kvm)
544 int r;
546 kvm->irqchip_in_kernel = 0;
547 #ifdef KVM_CAP_IRQCHIP
548 if (!kvm->no_irqchip_creation) {
549 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
550 if (r > 0) { /* kernel irqchip supported */
551 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
552 if (r >= 0) {
553 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
554 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
555 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
556 KVM_CAP_IRQ_INJECT_STATUS);
557 if (r > 0)
558 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
559 #endif
560 kvm->irqchip_in_kernel = 1;
561 } else
562 fprintf(stderr, "Create kernel PIC irqchip failed\n");
565 #endif
566 kvm_state->irqchip_in_kernel = kvm->irqchip_in_kernel;
569 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
571 int r;
573 r = kvm_create_vm(kvm);
574 if (r < 0)
575 return r;
576 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
577 if (r < 0)
578 return r;
579 init_slots();
580 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
581 if (r < 0)
582 return r;
583 kvm_create_irqchip(kvm);
585 return 0;
589 int kvm_register_phys_mem(kvm_context_t kvm,
590 unsigned long phys_start, void *userspace_addr,
591 unsigned long len, int log)
594 struct kvm_userspace_memory_region memory = {
595 .memory_size = len,
596 .guest_phys_addr = phys_start,
597 .userspace_addr = (unsigned long) (uintptr_t) userspace_addr,
598 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
600 int r;
602 memory.slot = get_free_slot(kvm);
603 DPRINTF
604 ("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %x\n",
605 memory.guest_phys_addr, memory.memory_size, memory.userspace_addr,
606 memory.slot, memory.flags);
607 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
608 if (r < 0) {
609 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r));
610 return -1;
612 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
613 memory.userspace_addr, memory.flags);
614 return 0;
618 /* destroy/free a whole slot.
619 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
621 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
622 unsigned long len)
624 int slot;
625 int r;
626 struct kvm_userspace_memory_region memory = {
627 .memory_size = 0,
628 .guest_phys_addr = phys_start,
629 .userspace_addr = 0,
630 .flags = 0,
633 slot = get_slot(phys_start);
635 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
636 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", __FUNCTION__,
637 slot);
638 return;
640 if (phys_start != slots[slot].phys_addr) {
641 fprintf(stderr,
642 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
643 __FUNCTION__, phys_start, slots[slot].phys_addr);
644 phys_start = slots[slot].phys_addr;
647 memory.slot = slot;
648 DPRINTF("slot %d start %llx len %llx flags %x\n",
649 memory.slot, memory.guest_phys_addr, memory.memory_size,
650 memory.flags);
651 r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory);
652 if (r < 0) {
653 fprintf(stderr, "destroy_userspace_phys_mem: %s", strerror(-r));
654 return;
657 free_slot(memory.slot);
660 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr,
661 unsigned long size)
664 int slot = get_container_slot(phys_addr, size);
666 if (slot != -1) {
667 DPRINTF("Unregistering memory region %" PRIx64 " (%lx)\n", phys_addr, size);
668 kvm_destroy_phys_mem(kvm, phys_addr, size);
669 return;
673 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
675 int r;
676 struct kvm_dirty_log log = {
677 .slot = slot,
680 log.dirty_bitmap = buf;
682 r = kvm_vm_ioctl(kvm_state, ioctl_num, &log);
683 if (r < 0)
684 return r;
685 return 0;
688 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
690 int slot;
692 slot = get_slot(phys_addr);
693 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
696 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
697 unsigned long len, void *opaque,
698 int (*cb)(unsigned long start,
699 unsigned long len, void *bitmap,
700 void *opaque))
702 int i;
703 int r;
704 unsigned long end_addr = phys_addr + len;
705 void *buf;
707 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
708 if ((slots[i].len && (uint64_t) slots[i].phys_addr >= phys_addr)
709 && ((uint64_t) slots[i].phys_addr + slots[i].len <= end_addr)) {
710 buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
711 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
712 if (r) {
713 qemu_free(buf);
714 return r;
716 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
717 qemu_free(buf);
718 if (r)
719 return r;
722 return 0;
725 #ifdef KVM_CAP_IRQCHIP
727 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
729 struct kvm_irq_level event;
730 int r;
732 if (!kvm->irqchip_in_kernel)
733 return 0;
734 event.level = level;
735 event.irq = irq;
736 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
737 if (r < 0)
738 perror("kvm_set_irq_level");
740 if (status) {
741 #ifdef KVM_CAP_IRQ_INJECT_STATUS
742 *status =
743 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
744 #else
745 *status = 1;
746 #endif
749 return 1;
752 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
754 int r;
756 if (!kvm->irqchip_in_kernel)
757 return 0;
758 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
759 if (r < 0) {
760 perror("kvm_get_irqchip\n");
762 return r;
765 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
767 int r;
769 if (!kvm->irqchip_in_kernel)
770 return 0;
771 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
772 if (r < 0) {
773 perror("kvm_set_irqchip\n");
775 return r;
778 #endif
780 static int handle_debug(CPUState *env)
782 #ifdef KVM_CAP_SET_GUEST_DEBUG
783 struct kvm_run *run = env->kvm_run;
785 return kvm_debug(env, &run->debug.arch);
786 #else
787 return 0;
788 #endif
791 int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
793 return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
796 int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
798 return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
801 int kvm_get_fpu(CPUState *env, struct kvm_fpu *fpu)
803 return kvm_vcpu_ioctl(env, KVM_GET_FPU, fpu);
806 int kvm_set_fpu(CPUState *env, struct kvm_fpu *fpu)
808 return kvm_vcpu_ioctl(env, KVM_SET_FPU, fpu);
811 int kvm_get_sregs(CPUState *env, struct kvm_sregs *sregs)
813 return kvm_vcpu_ioctl(env, KVM_GET_SREGS, sregs);
816 int kvm_set_sregs(CPUState *env, struct kvm_sregs *sregs)
818 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, sregs);
821 #ifdef KVM_CAP_MP_STATE
822 int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
824 int r;
826 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
827 if (r > 0)
828 return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
829 return -ENOSYS;
832 int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
834 int r;
836 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
837 if (r > 0)
838 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
839 return -ENOSYS;
841 #endif
843 static int handle_mmio(CPUState *env)
845 unsigned long addr = env->kvm_run->mmio.phys_addr;
846 struct kvm_run *kvm_run = env->kvm_run;
847 void *data = kvm_run->mmio.data;
849 /* hack: Red Hat 7.1 generates these weird accesses. */
850 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
851 return 0;
853 cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
854 return 0;
857 int handle_io_window(kvm_context_t kvm)
859 return 1;
862 int handle_shutdown(kvm_context_t kvm, CPUState *env)
864 /* stop the current vcpu from going back to guest mode */
865 env->stopped = 1;
867 qemu_system_reset_request();
868 return 1;
871 static inline void push_nmi(kvm_context_t kvm)
873 #ifdef KVM_CAP_USER_NMI
874 kvm_arch_push_nmi(kvm->opaque);
875 #endif /* KVM_CAP_USER_NMI */
878 void post_kvm_run(kvm_context_t kvm, CPUState *env)
880 pthread_mutex_lock(&qemu_mutex);
881 kvm_arch_post_run(env, env->kvm_run);
882 cpu_single_env = env;
885 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
887 kvm_arch_pre_run(env, env->kvm_run);
889 if (env->kvm_vcpu_dirty) {
890 kvm_arch_load_regs(env, KVM_PUT_RUNTIME_STATE);
891 env->kvm_vcpu_dirty = 0;
894 pthread_mutex_unlock(&qemu_mutex);
895 return 0;
898 int kvm_is_ready_for_interrupt_injection(CPUState *env)
900 return env->kvm_run->ready_for_interrupt_injection;
903 static int kvm_handle_internal_error(kvm_context_t kvm,
904 CPUState *env,
905 struct kvm_run *run)
907 fprintf(stderr, "KVM internal error. Suberror: %d\n",
908 run->internal.suberror);
909 #ifdef KVM_CAP_INTERNAL_ERROR_DATA
910 if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
911 int i;
913 for (i = 0; i < run->internal.ndata; ++i) {
914 fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
915 i, (uint64_t)run->internal.data[i]);
918 #endif
919 kvm_show_regs(env);
920 if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION)
921 fprintf(stderr, "emulation failure\n");
922 vm_stop(0);
923 return 1;
926 int kvm_run(CPUState *env)
928 int r;
929 kvm_context_t kvm = &env->kvm_state->kvm_context;
930 struct kvm_run *run = env->kvm_run;
931 int fd = env->kvm_fd;
933 again:
934 push_nmi(kvm);
935 #if !defined(__s390__)
936 if (!kvm->irqchip_in_kernel)
937 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
938 #endif
940 r = pre_kvm_run(kvm, env);
941 if (r)
942 return r;
943 r = ioctl(fd, KVM_RUN, 0);
945 if (r == -1 && errno != EINTR && errno != EAGAIN) {
946 r = -errno;
947 post_kvm_run(kvm, env);
948 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
949 return r;
952 post_kvm_run(kvm, env);
954 kvm_flush_coalesced_mmio_buffer();
956 #if !defined(__s390__)
957 if (r == -1) {
958 r = handle_io_window(kvm);
959 goto more;
961 #endif
962 if (1) {
963 switch (run->exit_reason) {
964 case KVM_EXIT_UNKNOWN:
965 r = handle_unhandled(run->hw.hardware_exit_reason);
966 break;
967 case KVM_EXIT_FAIL_ENTRY:
968 r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason);
969 break;
970 case KVM_EXIT_EXCEPTION:
971 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
972 run->ex.error_code);
973 kvm_show_regs(env);
974 kvm_show_code(env);
975 abort();
976 break;
977 case KVM_EXIT_IO:
978 r = kvm_handle_io(run->io.port,
979 (uint8_t *)run + run->io.data_offset,
980 run->io.direction,
981 run->io.size,
982 run->io.count);
983 r = 0;
984 break;
985 case KVM_EXIT_DEBUG:
986 r = handle_debug(env);
987 break;
988 case KVM_EXIT_MMIO:
989 r = handle_mmio(env);
990 break;
991 case KVM_EXIT_HLT:
992 r = kvm_arch_halt(env);
993 break;
994 case KVM_EXIT_IRQ_WINDOW_OPEN:
995 break;
996 case KVM_EXIT_SHUTDOWN:
997 r = handle_shutdown(kvm, env);
998 break;
999 #if defined(__s390__)
1000 case KVM_EXIT_S390_SIEIC:
1001 r = kvm_s390_handle_intercept(kvm, env, run);
1002 break;
1003 case KVM_EXIT_S390_RESET:
1004 r = kvm_s390_handle_reset(kvm, env, run);
1005 break;
1006 #endif
1007 case KVM_EXIT_INTERNAL_ERROR:
1008 r = kvm_handle_internal_error(kvm, env, run);
1009 break;
1010 default:
1011 if (kvm_arch_run(env)) {
1012 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
1013 kvm_show_regs(env);
1014 abort();
1016 break;
1019 more:
1020 if (!r)
1021 goto again;
1022 return r;
1025 int kvm_inject_irq(CPUState *env, unsigned irq)
1027 struct kvm_interrupt intr;
1029 intr.irq = irq;
1030 return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
1033 int kvm_inject_nmi(CPUState *env)
1035 #ifdef KVM_CAP_USER_NMI
1036 return kvm_vcpu_ioctl(env, KVM_NMI);
1037 #else
1038 return -ENOSYS;
1039 #endif
1042 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1044 int r = 0;
1045 kvm_state->coalesced_mmio = 0;
1046 #ifdef KVM_CAP_COALESCED_MMIO
1047 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1048 if (r > 0) {
1049 kvm_state->coalesced_mmio = r;
1050 return 0;
1052 #endif
1053 return r;
1056 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1057 int kvm_assign_pci_device(kvm_context_t kvm,
1058 struct kvm_assigned_pci_dev *assigned_dev)
1060 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1063 static int kvm_old_assign_irq(kvm_context_t kvm,
1064 struct kvm_assigned_irq *assigned_irq)
1066 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
1069 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1070 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1072 int ret;
1074 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1075 if (ret > 0) {
1076 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1079 return kvm_old_assign_irq(kvm, assigned_irq);
1082 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1084 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1086 #else
1087 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
1089 return kvm_old_assign_irq(kvm, assigned_irq);
1091 #endif
1092 #endif
1094 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1095 int kvm_deassign_pci_device(kvm_context_t kvm,
1096 struct kvm_assigned_pci_dev *assigned_dev)
1098 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1100 #endif
1102 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1104 int ret = 0;
1106 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1107 ret =
1108 kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
1109 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1110 if (ret <= 0)
1111 ret = 0;
1112 #endif
1113 return ret;
1116 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1118 #ifdef KVM_CAP_REINJECT_CONTROL
1119 int r;
1120 struct kvm_reinject_control control;
1122 control.pit_reinject = pit_reinject;
1124 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1125 if (r > 0) {
1126 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
1128 #endif
1129 return -ENOSYS;
1132 int kvm_has_gsi_routing(kvm_context_t kvm)
1134 int r = 0;
1136 #ifdef KVM_CAP_IRQ_ROUTING
1137 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1138 #endif
1139 return r;
1142 int kvm_get_gsi_count(kvm_context_t kvm)
1144 #ifdef KVM_CAP_IRQ_ROUTING
1145 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1146 #else
1147 return -EINVAL;
1148 #endif
1151 int kvm_clear_gsi_routes(kvm_context_t kvm)
1153 #ifdef KVM_CAP_IRQ_ROUTING
1154 kvm->irq_routes->nr = 0;
1155 return 0;
1156 #else
1157 return -EINVAL;
1158 #endif
1161 int kvm_add_routing_entry(kvm_context_t kvm,
1162 struct kvm_irq_routing_entry *entry)
1164 #ifdef KVM_CAP_IRQ_ROUTING
1165 struct kvm_irq_routing *z;
1166 struct kvm_irq_routing_entry *new;
1167 int n, size;
1169 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1170 n = kvm->nr_allocated_irq_routes * 2;
1171 if (n < 64)
1172 n = 64;
1173 size = sizeof(struct kvm_irq_routing);
1174 size += n * sizeof(*new);
1175 z = realloc(kvm->irq_routes, size);
1176 if (!z)
1177 return -ENOMEM;
1178 kvm->nr_allocated_irq_routes = n;
1179 kvm->irq_routes = z;
1181 n = kvm->irq_routes->nr++;
1182 new = &kvm->irq_routes->entries[n];
1183 memset(new, 0, sizeof(*new));
1184 new->gsi = entry->gsi;
1185 new->type = entry->type;
1186 new->flags = entry->flags;
1187 new->u = entry->u;
1189 set_gsi(kvm, entry->gsi);
1191 return 0;
1192 #else
1193 return -ENOSYS;
1194 #endif
1197 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1199 #ifdef KVM_CAP_IRQ_ROUTING
1200 struct kvm_irq_routing_entry e;
1202 e.gsi = gsi;
1203 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1204 e.flags = 0;
1205 e.u.irqchip.irqchip = irqchip;
1206 e.u.irqchip.pin = pin;
1207 return kvm_add_routing_entry(kvm, &e);
1208 #else
1209 return -ENOSYS;
1210 #endif
1213 int kvm_del_routing_entry(kvm_context_t kvm,
1214 struct kvm_irq_routing_entry *entry)
1216 #ifdef KVM_CAP_IRQ_ROUTING
1217 struct kvm_irq_routing_entry *e, *p;
1218 int i, gsi, found = 0;
1220 gsi = entry->gsi;
1222 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1223 e = &kvm->irq_routes->entries[i];
1224 if (e->type == entry->type && e->gsi == gsi) {
1225 switch (e->type) {
1226 case KVM_IRQ_ROUTING_IRQCHIP:{
1227 if (e->u.irqchip.irqchip ==
1228 entry->u.irqchip.irqchip
1229 && e->u.irqchip.pin == entry->u.irqchip.pin) {
1230 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1231 *e = *p;
1232 found = 1;
1234 break;
1236 case KVM_IRQ_ROUTING_MSI:{
1237 if (e->u.msi.address_lo ==
1238 entry->u.msi.address_lo
1239 && e->u.msi.address_hi ==
1240 entry->u.msi.address_hi
1241 && e->u.msi.data == entry->u.msi.data) {
1242 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
1243 *e = *p;
1244 found = 1;
1246 break;
1248 default:
1249 break;
1251 if (found) {
1252 /* If there are no other users of this GSI
1253 * mark it available in the bitmap */
1254 for (i = 0; i < kvm->irq_routes->nr; i++) {
1255 e = &kvm->irq_routes->entries[i];
1256 if (e->gsi == gsi)
1257 break;
1259 if (i == kvm->irq_routes->nr)
1260 clear_gsi(kvm, gsi);
1262 return 0;
1266 return -ESRCH;
1267 #else
1268 return -ENOSYS;
1269 #endif
1272 int kvm_update_routing_entry(kvm_context_t kvm,
1273 struct kvm_irq_routing_entry *entry,
1274 struct kvm_irq_routing_entry *newentry)
1276 #ifdef KVM_CAP_IRQ_ROUTING
1277 struct kvm_irq_routing_entry *e;
1278 int i;
1280 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
1281 return -EINVAL;
1284 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1285 e = &kvm->irq_routes->entries[i];
1286 if (e->type != entry->type || e->gsi != entry->gsi) {
1287 continue;
1289 switch (e->type) {
1290 case KVM_IRQ_ROUTING_IRQCHIP:
1291 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1292 e->u.irqchip.pin == entry->u.irqchip.pin) {
1293 memcpy(&e->u.irqchip, &newentry->u.irqchip,
1294 sizeof e->u.irqchip);
1295 return 0;
1297 break;
1298 case KVM_IRQ_ROUTING_MSI:
1299 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1300 e->u.msi.address_hi == entry->u.msi.address_hi &&
1301 e->u.msi.data == entry->u.msi.data) {
1302 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
1303 return 0;
1305 break;
1306 default:
1307 break;
1310 return -ESRCH;
1311 #else
1312 return -ENOSYS;
1313 #endif
1316 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1318 #ifdef KVM_CAP_IRQ_ROUTING
1319 struct kvm_irq_routing_entry e;
1321 e.gsi = gsi;
1322 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1323 e.flags = 0;
1324 e.u.irqchip.irqchip = irqchip;
1325 e.u.irqchip.pin = pin;
1326 return kvm_del_routing_entry(kvm, &e);
1327 #else
1328 return -ENOSYS;
1329 #endif
1332 int kvm_commit_irq_routes(kvm_context_t kvm)
1334 #ifdef KVM_CAP_IRQ_ROUTING
1335 kvm->irq_routes->flags = 0;
1336 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1337 #else
1338 return -ENOSYS;
1339 #endif
1342 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1344 int i, bit;
1345 uint32_t *buf = kvm->used_gsi_bitmap;
1347 /* Return the lowest unused GSI in the bitmap */
1348 for (i = 0; i < kvm->max_gsi / 32; i++) {
1349 bit = ffs(~buf[i]);
1350 if (!bit)
1351 continue;
1353 return bit - 1 + i * 32;
1356 return -ENOSPC;
1359 #ifdef KVM_CAP_DEVICE_MSIX
1360 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1361 struct kvm_assigned_msix_nr *msix_nr)
1363 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1366 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1367 struct kvm_assigned_msix_entry *entry)
1369 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1371 #endif
1373 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
1375 #include <sys/eventfd.h>
1377 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1379 struct kvm_irqfd data = {
1380 .fd = fd,
1381 .gsi = gsi,
1382 .flags = flags,
1385 return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
1388 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1390 int r;
1391 int fd;
1393 if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
1394 return -ENOENT;
1396 fd = eventfd(0, 0);
1397 if (fd < 0)
1398 return -errno;
1400 r = _kvm_irqfd(kvm, fd, gsi, 0);
1401 if (r < 0) {
1402 close(fd);
1403 return -errno;
1406 return fd;
1409 #else /* KVM_CAP_IRQFD */
1411 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1413 return -ENOSYS;
1416 #endif /* KVM_CAP_IRQFD */
1417 unsigned long kvm_get_thread_id(void)
1419 return syscall(SYS_gettid);
1422 static void qemu_cond_wait(pthread_cond_t *cond)
1424 CPUState *env = cpu_single_env;
1426 pthread_cond_wait(cond, &qemu_mutex);
1427 cpu_single_env = env;
1430 static void sig_ipi_handler(int n)
1434 static void hardware_memory_error(void)
1436 fprintf(stderr, "Hardware memory error!\n");
1437 exit(1);
1440 static void sigbus_reraise(void)
1442 sigset_t set;
1443 struct sigaction action;
1445 memset(&action, 0, sizeof(action));
1446 action.sa_handler = SIG_DFL;
1447 if (!sigaction(SIGBUS, &action, NULL)) {
1448 raise(SIGBUS);
1449 sigemptyset(&set);
1450 sigaddset(&set, SIGBUS);
1451 sigprocmask(SIG_UNBLOCK, &set, NULL);
1453 perror("Failed to re-raise SIGBUS!\n");
1454 abort();
1457 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
1458 void *ctx)
1460 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1461 if (first_cpu->mcg_cap && siginfo->ssi_addr
1462 && siginfo->ssi_code == BUS_MCEERR_AO) {
1463 uint64_t status;
1464 unsigned long paddr;
1465 CPUState *cenv;
1467 /* Hope we are lucky for AO MCE */
1468 if (do_qemu_ram_addr_from_host((void *)(intptr_t)siginfo->ssi_addr,
1469 &paddr)) {
1470 fprintf(stderr, "Hardware memory error for memory used by "
1471 "QEMU itself instead of guest system!: %llx\n",
1472 (unsigned long long)siginfo->ssi_addr);
1473 return;
1475 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1476 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1477 | 0xc0;
1478 kvm_inject_x86_mce(first_cpu, 9, status,
1479 MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
1480 (MCM_ADDR_PHYS << 6) | 0xc, 1);
1481 for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu)
1482 kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
1483 MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
1484 } else
1485 #endif
1487 if (siginfo->ssi_code == BUS_MCEERR_AO)
1488 return;
1489 else if (siginfo->ssi_code == BUS_MCEERR_AR)
1490 hardware_memory_error();
1491 else
1492 sigbus_reraise();
1496 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1498 struct qemu_work_item wi;
1500 if (env == current_env) {
1501 func(data);
1502 return;
1505 wi.func = func;
1506 wi.data = data;
1507 if (!env->kvm_cpu_state.queued_work_first)
1508 env->kvm_cpu_state.queued_work_first = &wi;
1509 else
1510 env->kvm_cpu_state.queued_work_last->next = &wi;
1511 env->kvm_cpu_state.queued_work_last = &wi;
1512 wi.next = NULL;
1513 wi.done = false;
1515 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1516 while (!wi.done)
1517 qemu_cond_wait(&qemu_work_cond);
1520 static void do_kvm_cpu_synchronize_state(void *_env)
1522 CPUState *env = _env;
1524 if (!env->kvm_vcpu_dirty) {
1525 kvm_arch_save_regs(env);
1526 env->kvm_vcpu_dirty = 1;
1530 void kvm_cpu_synchronize_state(CPUState *env)
1532 if (!env->kvm_vcpu_dirty)
1533 on_vcpu(env, do_kvm_cpu_synchronize_state, env);
1536 void kvm_cpu_synchronize_post_reset(CPUState *env)
1538 kvm_arch_load_regs(env, KVM_PUT_RESET_STATE);
1539 env->kvm_vcpu_dirty = 0;
1542 void kvm_cpu_synchronize_post_init(CPUState *env)
1544 kvm_arch_load_regs(env, KVM_PUT_FULL_STATE);
1545 env->kvm_vcpu_dirty = 0;
1548 static void inject_interrupt(void *data)
1550 cpu_interrupt(current_env, (long) data);
1553 void kvm_inject_interrupt(CPUState *env, int mask)
1555 on_vcpu(env, inject_interrupt, (void *) (long) mask);
1558 void kvm_update_interrupt_request(CPUState *env)
1560 int signal = 0;
1562 if (env) {
1563 if (!current_env || !current_env->created)
1564 signal = 1;
1566 * Testing for created here is really redundant
1568 if (current_env && current_env->created &&
1569 env != current_env && !env->kvm_cpu_state.signalled)
1570 signal = 1;
1572 if (signal) {
1573 env->kvm_cpu_state.signalled = 1;
1574 if (env->kvm_cpu_state.thread)
1575 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1580 int kvm_cpu_exec(CPUState *env)
1582 int r;
1584 r = kvm_run(env);
1585 if (r < 0) {
1586 printf("kvm_run returned %d\n", r);
1587 vm_stop(0);
1590 return 0;
1593 int kvm_cpu_is_stopped(CPUState *env)
1595 return !vm_running || env->stopped;
1598 static void flush_queued_work(CPUState *env)
1600 struct qemu_work_item *wi;
1602 if (!env->kvm_cpu_state.queued_work_first)
1603 return;
1605 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1606 env->kvm_cpu_state.queued_work_first = wi->next;
1607 wi->func(wi->data);
1608 wi->done = true;
1610 env->kvm_cpu_state.queued_work_last = NULL;
1611 pthread_cond_broadcast(&qemu_work_cond);
1614 static void kvm_on_sigbus(CPUState *env, siginfo_t *siginfo)
1616 #if defined(KVM_CAP_MCE) && defined(TARGET_I386)
1617 struct kvm_x86_mce mce = {
1618 .bank = 9,
1620 unsigned long paddr;
1621 int r;
1623 if (env->mcg_cap && siginfo->si_addr
1624 && (siginfo->si_code == BUS_MCEERR_AR
1625 || siginfo->si_code == BUS_MCEERR_AO)) {
1626 if (siginfo->si_code == BUS_MCEERR_AR) {
1627 /* Fake an Intel architectural Data Load SRAR UCR */
1628 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1629 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1630 | MCI_STATUS_AR | 0x134;
1631 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1632 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
1633 } else {
1634 /* Fake an Intel architectural Memory scrubbing UCR */
1635 mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1636 | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1637 | 0xc0;
1638 mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1639 mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
1641 if (do_qemu_ram_addr_from_host((void *)siginfo->si_addr, &paddr)) {
1642 fprintf(stderr, "Hardware memory error for memory used by "
1643 "QEMU itself instaed of guest system!\n");
1644 /* Hope we are lucky for AO MCE */
1645 if (siginfo->si_code == BUS_MCEERR_AO)
1646 return;
1647 else
1648 hardware_memory_error();
1650 mce.addr = paddr;
1651 r = kvm_set_mce(env, &mce);
1652 if (r < 0) {
1653 fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1654 abort();
1656 } else
1657 #endif
1659 if (siginfo->si_code == BUS_MCEERR_AO)
1660 return;
1661 else if (siginfo->si_code == BUS_MCEERR_AR)
1662 hardware_memory_error();
1663 else
1664 sigbus_reraise();
1668 static void kvm_main_loop_wait(CPUState *env, int timeout)
1670 struct timespec ts;
1671 int r, e;
1672 siginfo_t siginfo;
1673 sigset_t waitset;
1674 sigset_t chkset;
1676 ts.tv_sec = timeout / 1000;
1677 ts.tv_nsec = (timeout % 1000) * 1000000;
1678 sigemptyset(&waitset);
1679 sigaddset(&waitset, SIG_IPI);
1680 sigaddset(&waitset, SIGBUS);
1682 do {
1683 pthread_mutex_unlock(&qemu_mutex);
1685 r = sigtimedwait(&waitset, &siginfo, &ts);
1686 e = errno;
1688 pthread_mutex_lock(&qemu_mutex);
1690 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1691 printf("sigtimedwait: %s\n", strerror(e));
1692 exit(1);
1695 switch (r) {
1696 case SIGBUS:
1697 kvm_on_sigbus(env, &siginfo);
1698 break;
1699 default:
1700 break;
1703 r = sigpending(&chkset);
1704 if (r == -1) {
1705 printf("sigpending: %s\n", strerror(e));
1706 exit(1);
1708 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1710 cpu_single_env = env;
1711 flush_queued_work(env);
1713 if (env->stop) {
1714 env->stop = 0;
1715 env->stopped = 1;
1716 pthread_cond_signal(&qemu_pause_cond);
1719 env->kvm_cpu_state.signalled = 0;
1722 static int all_threads_paused(void)
1724 CPUState *penv = first_cpu;
1726 while (penv) {
1727 if (penv->stop)
1728 return 0;
1729 penv = (CPUState *) penv->next_cpu;
1732 return 1;
1735 static void pause_all_threads(void)
1737 CPUState *penv = first_cpu;
1739 while (penv) {
1740 if (penv != cpu_single_env) {
1741 penv->stop = 1;
1742 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1743 } else {
1744 penv->stop = 0;
1745 penv->stopped = 1;
1746 cpu_exit(penv);
1748 penv = (CPUState *) penv->next_cpu;
1751 while (!all_threads_paused())
1752 qemu_cond_wait(&qemu_pause_cond);
1755 static void resume_all_threads(void)
1757 CPUState *penv = first_cpu;
1759 assert(!cpu_single_env);
1761 while (penv) {
1762 penv->stop = 0;
1763 penv->stopped = 0;
1764 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1765 penv = (CPUState *) penv->next_cpu;
1769 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1771 if (running)
1772 resume_all_threads();
1773 else
1774 pause_all_threads();
1777 static void setup_kernel_sigmask(CPUState *env)
1779 sigset_t set;
1781 sigemptyset(&set);
1782 sigaddset(&set, SIGUSR2);
1783 sigaddset(&set, SIGIO);
1784 sigaddset(&set, SIGALRM);
1785 sigprocmask(SIG_BLOCK, &set, NULL);
1787 sigprocmask(SIG_BLOCK, NULL, &set);
1788 sigdelset(&set, SIG_IPI);
1789 sigdelset(&set, SIGBUS);
1791 kvm_set_signal_mask(env, &set);
1794 static void qemu_kvm_system_reset(void)
1796 pause_all_threads();
1798 qemu_system_reset();
1800 resume_all_threads();
1803 static void process_irqchip_events(CPUState *env)
1805 kvm_arch_process_irqchip_events(env);
1806 if (kvm_arch_has_work(env))
1807 env->halted = 0;
1810 static int kvm_main_loop_cpu(CPUState *env)
1812 while (1) {
1813 int run_cpu = !kvm_cpu_is_stopped(env);
1814 if (run_cpu && !kvm_irqchip_in_kernel()) {
1815 process_irqchip_events(env);
1816 run_cpu = !env->halted;
1818 if (run_cpu) {
1819 kvm_cpu_exec(env);
1820 kvm_main_loop_wait(env, 0);
1821 } else {
1822 kvm_main_loop_wait(env, 1000);
1825 pthread_mutex_unlock(&qemu_mutex);
1826 return 0;
1829 static void *ap_main_loop(void *_env)
1831 CPUState *env = _env;
1832 sigset_t signals;
1833 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1834 struct ioperm_data *data = NULL;
1835 #endif
1837 current_env = env;
1838 env->thread_id = kvm_get_thread_id();
1839 sigfillset(&signals);
1840 sigprocmask(SIG_BLOCK, &signals, NULL);
1842 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1843 /* do ioperm for io ports of assigned devices */
1844 QLIST_FOREACH(data, &ioperm_head, entries)
1845 on_vcpu(env, kvm_arch_do_ioperm, data);
1846 #endif
1848 pthread_mutex_lock(&qemu_mutex);
1849 cpu_single_env = env;
1851 kvm_create_vcpu(env, env->cpu_index);
1852 setup_kernel_sigmask(env);
1854 /* signal VCPU creation */
1855 current_env->created = 1;
1856 pthread_cond_signal(&qemu_vcpu_cond);
1858 /* and wait for machine initialization */
1859 while (!qemu_system_ready)
1860 qemu_cond_wait(&qemu_system_cond);
1862 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1863 cpu_single_env = env;
1865 kvm_main_loop_cpu(env);
1866 return NULL;
1869 int kvm_init_vcpu(CPUState *env)
1871 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
1873 while (env->created == 0)
1874 qemu_cond_wait(&qemu_vcpu_cond);
1876 return 0;
1879 int kvm_vcpu_inited(CPUState *env)
1881 return env->created;
1884 #ifdef TARGET_I386
1885 void kvm_hpet_disable_kpit(void)
1887 struct kvm_pit_state2 ps2;
1889 kvm_get_pit2(kvm_context, &ps2);
1890 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1891 kvm_set_pit2(kvm_context, &ps2);
1894 void kvm_hpet_enable_kpit(void)
1896 struct kvm_pit_state2 ps2;
1898 kvm_get_pit2(kvm_context, &ps2);
1899 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1900 kvm_set_pit2(kvm_context, &ps2);
1902 #endif
1904 int kvm_init_ap(void)
1906 struct sigaction action;
1908 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1910 signal(SIG_IPI, sig_ipi_handler);
1912 memset(&action, 0, sizeof(action));
1913 action.sa_flags = SA_SIGINFO;
1914 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
1915 sigaction(SIGBUS, &action, NULL);
1916 prctl(PR_MCE_KILL, 1, 1, 0, 0);
1917 return 0;
1920 void qemu_kvm_notify_work(void)
1922 /* Write 8 bytes to be compatible with eventfd. */
1923 static uint64_t val = 1;
1924 ssize_t ret;
1926 if (io_thread_fd == -1)
1927 return;
1929 do {
1930 ret = write(io_thread_fd, &val, sizeof(val));
1931 } while (ret < 0 && errno == EINTR);
1933 /* EAGAIN is fine in case we have a pipe. */
1934 if (ret < 0 && errno != EAGAIN) {
1935 fprintf(stderr, "qemu_kvm_notify_work: write() filed: %s\n",
1936 strerror(errno));
1937 exit (1);
1941 /* If we have signalfd, we mask out the signals we want to handle and then
1942 * use signalfd to listen for them. We rely on whatever the current signal
1943 * handler is to dispatch the signals when we receive them.
1946 static void sigfd_handler(void *opaque)
1948 int fd = (unsigned long) opaque;
1949 struct qemu_signalfd_siginfo info;
1950 struct sigaction action;
1951 ssize_t len;
1953 while (1) {
1954 do {
1955 len = read(fd, &info, sizeof(info));
1956 } while (len == -1 && errno == EINTR);
1958 if (len == -1 && errno == EAGAIN)
1959 break;
1961 if (len != sizeof(info)) {
1962 printf("read from sigfd returned %zd: %m\n", len);
1963 return;
1966 sigaction(info.ssi_signo, NULL, &action);
1967 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction)
1968 action.sa_sigaction(info.ssi_signo,
1969 (siginfo_t *)&info, NULL);
1970 else if (action.sa_handler)
1971 action.sa_handler(info.ssi_signo);
1976 /* Used to break IO thread out of select */
1977 static void io_thread_wakeup(void *opaque)
1979 int fd = (unsigned long) opaque;
1980 ssize_t len;
1981 char buffer[512];
1983 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
1984 do {
1985 len = read(fd, buffer, sizeof(buffer));
1986 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
1989 int kvm_main_loop(void)
1991 int fds[2];
1992 sigset_t mask;
1993 int sigfd;
1995 io_thread = pthread_self();
1996 qemu_system_ready = 1;
1998 if (qemu_eventfd(fds) == -1) {
1999 fprintf(stderr, "failed to create eventfd\n");
2000 return -errno;
2003 fcntl(fds[0], F_SETFL, O_NONBLOCK);
2004 fcntl(fds[1], F_SETFL, O_NONBLOCK);
2006 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2007 (void *)(unsigned long) fds[0]);
2009 io_thread_fd = fds[1];
2011 sigemptyset(&mask);
2012 sigaddset(&mask, SIGIO);
2013 sigaddset(&mask, SIGALRM);
2014 sigaddset(&mask, SIGBUS);
2015 sigprocmask(SIG_BLOCK, &mask, NULL);
2017 sigfd = qemu_signalfd(&mask);
2018 if (sigfd == -1) {
2019 fprintf(stderr, "failed to create signalfd\n");
2020 return -errno;
2023 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2025 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2026 (void *)(unsigned long) sigfd);
2028 pthread_cond_broadcast(&qemu_system_cond);
2030 io_thread_sigfd = sigfd;
2031 cpu_single_env = NULL;
2033 while (1) {
2034 main_loop_wait(1000);
2035 if (qemu_shutdown_requested()) {
2036 monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
2037 if (qemu_no_shutdown()) {
2038 vm_stop(0);
2039 } else
2040 break;
2041 } else if (qemu_powerdown_requested()) {
2042 monitor_protocol_event(QEVENT_POWERDOWN, NULL);
2043 qemu_irq_raise(qemu_system_powerdown);
2044 } else if (qemu_reset_requested()) {
2045 qemu_kvm_system_reset();
2046 } else if (kvm_debug_cpu_requested) {
2047 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2048 vm_stop(EXCP_DEBUG);
2049 kvm_debug_cpu_requested = NULL;
2053 pause_all_threads();
2054 pthread_mutex_unlock(&qemu_mutex);
2056 return 0;
2059 #ifdef TARGET_I386
2060 static int destroy_region_works = 0;
2061 #endif
2064 #if !defined(TARGET_I386)
2065 int kvm_arch_init_irq_routing(void)
2067 return 0;
2069 #endif
2071 extern int no_hpet;
2073 static int kvm_create_context(void)
2075 int r;
2077 if (!kvm_irqchip) {
2078 kvm_disable_irqchip_creation(kvm_context);
2080 if (!kvm_pit) {
2081 kvm_disable_pit_creation(kvm_context);
2083 if (kvm_create(kvm_context, 0, NULL) < 0) {
2084 kvm_finalize(kvm_state);
2085 return -1;
2087 r = kvm_arch_qemu_create_context();
2088 if (r < 0) {
2089 kvm_finalize(kvm_state);
2090 return -1;
2092 if (kvm_pit && !kvm_pit_reinject) {
2093 if (kvm_reinject_control(kvm_context, 0)) {
2094 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2095 return -1;
2098 #ifdef TARGET_I386
2099 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2100 #endif
2102 r = kvm_arch_init_irq_routing();
2103 if (r < 0) {
2104 return r;
2107 kvm_state->vcpu_events = 0;
2108 #ifdef KVM_CAP_VCPU_EVENTS
2109 kvm_state->vcpu_events = kvm_check_extension(kvm_state, KVM_CAP_VCPU_EVENTS);
2110 #endif
2112 kvm_state->debugregs = 0;
2113 #ifdef KVM_CAP_DEBUGREGS
2114 kvm_state->debugregs = kvm_check_extension(kvm_state, KVM_CAP_DEBUGREGS);
2115 #endif
2117 kvm_init_ap();
2118 if (kvm_irqchip) {
2119 if (!qemu_kvm_has_gsi_routing()) {
2120 irq0override = 0;
2121 #ifdef TARGET_I386
2122 /* if kernel can't do irq routing, interrupt source
2123 * override 0->2 can not be set up as required by hpet,
2124 * so disable hpet.
2126 no_hpet = 1;
2127 } else if (!qemu_kvm_has_pit_state2()) {
2128 no_hpet = 1;
2130 #else
2132 #endif
2135 return 0;
2138 #ifdef TARGET_I386
2139 static int must_use_aliases_source(target_phys_addr_t addr)
2141 if (destroy_region_works)
2142 return false;
2143 if (addr == 0xa0000 || addr == 0xa8000)
2144 return true;
2145 return false;
2148 static int must_use_aliases_target(target_phys_addr_t addr)
2150 if (destroy_region_works)
2151 return false;
2152 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2153 return true;
2154 return false;
2157 static struct mapping {
2158 target_phys_addr_t phys;
2159 ram_addr_t ram;
2160 ram_addr_t len;
2161 } mappings[50];
2162 static int nr_mappings;
2164 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2166 struct mapping *p;
2168 for (p = mappings; p < mappings + nr_mappings; ++p) {
2169 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2170 return p;
2173 return NULL;
2176 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2178 struct mapping *p;
2180 for (p = mappings; p < mappings + nr_mappings; ++p) {
2181 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2182 return p;
2185 return NULL;
2188 static void drop_mapping(target_phys_addr_t start_addr)
2190 struct mapping *p = find_mapping(start_addr);
2192 if (p)
2193 *p = mappings[--nr_mappings];
2195 #endif
2197 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2198 ram_addr_t phys_offset)
2200 int r = 0;
2201 unsigned long area_flags;
2202 #ifdef TARGET_I386
2203 struct mapping *p;
2204 #endif
2206 if (start_addr + size > phys_ram_size) {
2207 phys_ram_size = start_addr + size;
2210 phys_offset &= ~IO_MEM_ROM;
2211 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2213 if (area_flags != IO_MEM_RAM) {
2214 #ifdef TARGET_I386
2215 if (must_use_aliases_source(start_addr)) {
2216 kvm_destroy_memory_alias(kvm_context, start_addr);
2217 return;
2219 if (must_use_aliases_target(start_addr))
2220 return;
2221 #endif
2222 while (size > 0) {
2223 p = find_mapping(start_addr);
2224 if (p) {
2225 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2226 drop_mapping(p->phys);
2228 start_addr += TARGET_PAGE_SIZE;
2229 if (size > TARGET_PAGE_SIZE) {
2230 size -= TARGET_PAGE_SIZE;
2231 } else {
2232 size = 0;
2235 return;
2238 r = kvm_is_containing_region(kvm_context, start_addr, size);
2239 if (r)
2240 return;
2242 if (area_flags >= TLB_MMIO)
2243 return;
2245 #ifdef TARGET_I386
2246 if (must_use_aliases_source(start_addr)) {
2247 p = find_ram_mapping(phys_offset);
2248 if (p) {
2249 kvm_create_memory_alias(kvm_context, start_addr, size,
2250 p->phys + (phys_offset - p->ram));
2252 return;
2254 #endif
2256 r = kvm_register_phys_mem(kvm_context, start_addr,
2257 qemu_get_ram_ptr(phys_offset), size, 0);
2258 if (r < 0) {
2259 printf("kvm_cpu_register_physical_memory: failed\n");
2260 exit(1);
2262 #ifdef TARGET_I386
2263 drop_mapping(start_addr);
2264 p = &mappings[nr_mappings++];
2265 p->phys = start_addr;
2266 p->ram = phys_offset;
2267 p->len = size;
2268 #endif
2270 return;
2274 * dirty pages logging
2276 /* FIXME: use unsigned long pointer instead of unsigned char */
2277 unsigned char *kvm_dirty_bitmap = NULL;
2278 int kvm_physical_memory_set_dirty_tracking(int enable)
2280 int r = 0;
2282 if (!kvm_enabled())
2283 return 0;
2285 if (enable) {
2286 if (!kvm_dirty_bitmap) {
2287 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2288 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2289 r = kvm_dirty_pages_log_enable_all(kvm_context);
2291 } else {
2292 if (kvm_dirty_bitmap) {
2293 r = kvm_dirty_pages_log_reset(kvm_context);
2294 qemu_free(kvm_dirty_bitmap);
2295 kvm_dirty_bitmap = NULL;
2298 return r;
2301 /* get kvm's dirty pages bitmap and update qemu's */
2302 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2303 unsigned long *bitmap,
2304 unsigned long offset,
2305 unsigned long mem_size)
2307 unsigned int i, j;
2308 unsigned long page_number, addr, addr1, c;
2309 ram_addr_t ram_addr;
2310 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
2311 HOST_LONG_BITS;
2314 * bitmap-traveling is faster than memory-traveling (for addr...)
2315 * especially when most of the memory is not dirty.
2317 for (i = 0; i < len; i++) {
2318 if (bitmap[i] != 0) {
2319 c = leul_to_cpu(bitmap[i]);
2320 do {
2321 j = ffsl(c) - 1;
2322 c &= ~(1ul << j);
2323 page_number = i * HOST_LONG_BITS + j;
2324 addr1 = page_number * TARGET_PAGE_SIZE;
2325 addr = offset + addr1;
2326 ram_addr = cpu_get_physical_page_desc(addr);
2327 cpu_physical_memory_set_dirty(ram_addr);
2328 } while (c != 0);
2331 return 0;
2334 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2335 void *bitmap, void *opaque)
2337 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2340 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2341 int log)
2343 if (log)
2344 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2345 else {
2346 #ifdef TARGET_I386
2347 if (must_use_aliases_target(start))
2348 return;
2349 #endif
2350 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2354 #ifdef KVM_CAP_IRQCHIP
2356 int kvm_set_irq(int irq, int level, int *status)
2358 return kvm_set_irq_level(kvm_context, irq, level, status);
2361 #endif
2363 void kvm_mutex_unlock(void)
2365 assert(!cpu_single_env);
2366 pthread_mutex_unlock(&qemu_mutex);
2369 void kvm_mutex_lock(void)
2371 pthread_mutex_lock(&qemu_mutex);
2372 cpu_single_env = NULL;
2375 void qemu_mutex_unlock_iothread(void)
2377 if (kvm_enabled())
2378 kvm_mutex_unlock();
2381 void qemu_mutex_lock_iothread(void)
2383 if (kvm_enabled())
2384 kvm_mutex_lock();
2387 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2388 void kvm_add_ioperm_data(struct ioperm_data *data)
2390 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
2393 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2395 struct ioperm_data *data;
2397 data = QLIST_FIRST(&ioperm_head);
2398 while (data) {
2399 struct ioperm_data *next = QLIST_NEXT(data, entries);
2401 if (data->start_port == start_port && data->num == num) {
2402 QLIST_REMOVE(data, entries);
2403 qemu_free(data);
2406 data = next;
2410 void kvm_ioperm(CPUState *env, void *data)
2412 if (kvm_enabled() && qemu_system_ready)
2413 on_vcpu(env, kvm_arch_do_ioperm, data);
2416 #endif
2418 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2419 target_phys_addr_t end_addr)
2421 #ifndef TARGET_IA64
2423 #ifdef TARGET_I386
2424 if (must_use_aliases_source(start_addr))
2425 return 0;
2426 #endif
2428 kvm_get_dirty_pages_range(kvm_context, start_addr,
2429 end_addr - start_addr, NULL,
2430 kvm_get_dirty_bitmap_cb);
2431 #endif
2432 return 0;
2435 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t len)
2437 #ifdef TARGET_I386
2438 if (must_use_aliases_source(phys_addr))
2439 return 0;
2440 #endif
2442 #ifndef TARGET_IA64
2443 kvm_qemu_log_memory(phys_addr, len, 1);
2444 #endif
2445 return 0;
2448 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t len)
2450 #ifdef TARGET_I386
2451 if (must_use_aliases_source(phys_addr))
2452 return 0;
2453 #endif
2455 #ifndef TARGET_IA64
2456 kvm_qemu_log_memory(phys_addr, len, 0);
2457 #endif
2458 return 0;
2461 int kvm_set_boot_cpu_id(uint32_t id)
2463 return kvm_set_boot_vcpu_id(kvm_context, id);
2466 #ifdef TARGET_I386
2467 #ifdef KVM_CAP_MCE
2468 struct kvm_x86_mce_data {
2469 CPUState *env;
2470 struct kvm_x86_mce *mce;
2471 int abort_on_error;
2474 static void kvm_do_inject_x86_mce(void *_data)
2476 struct kvm_x86_mce_data *data = _data;
2477 int r;
2479 r = kvm_set_mce(data->env, data->mce);
2480 if (r < 0) {
2481 perror("kvm_set_mce FAILED");
2482 if (data->abort_on_error)
2483 abort();
2486 #endif
2488 void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
2489 uint64_t mcg_status, uint64_t addr, uint64_t misc,
2490 int abort_on_error)
2492 #ifdef KVM_CAP_MCE
2493 struct kvm_x86_mce mce = {
2494 .bank = bank,
2495 .status = status,
2496 .mcg_status = mcg_status,
2497 .addr = addr,
2498 .misc = misc,
2500 struct kvm_x86_mce_data data = {
2501 .env = cenv,
2502 .mce = &mce,
2503 .abort_on_error = abort_on_error,
2506 if (!cenv->mcg_cap) {
2507 fprintf(stderr, "MCE support is not enabled!\n");
2508 return;
2510 on_vcpu(cenv, kvm_do_inject_x86_mce, &data);
2511 #else
2512 if (abort_on_error)
2513 abort();
2514 #endif
2516 #endif