qemu-kvm: Replace kvm_show_regs/code with cpu_dump_state
[qemu-kvm.git] / qemu-kvm.c
blob55de9710858175017d3668fdcf90142b43d7cdfb
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
20 #include "monitor.h"
21 #include "cpus.h"
22 #include "qemu-thread.h"
24 #include "qemu-kvm.h"
26 #include <pthread.h>
27 #include <sys/utsname.h>
28 #include <sys/syscall.h>
29 #include <sys/mman.h>
30 #include <sys/ioctl.h>
31 #include "compatfd.h"
32 #include <sys/prctl.h>
34 #ifndef PR_MCE_KILL
35 #define PR_MCE_KILL 33
36 #endif
38 #ifndef BUS_MCEERR_AR
39 #define BUS_MCEERR_AR 4
40 #endif
41 #ifndef BUS_MCEERR_AO
42 #define BUS_MCEERR_AO 5
43 #endif
45 #define EXPECTED_KVM_API_VERSION 12
47 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
48 #error libkvm: userspace and kernel version mismatch
49 #endif
51 int kvm_irqchip = 1;
52 int kvm_pit = 1;
53 int kvm_pit_reinject = 1;
54 int kvm_nested = 0;
57 KVMState *kvm_state;
58 kvm_context_t kvm_context;
60 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
61 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
62 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
63 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
64 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
65 __thread CPUState *current_env;
67 static int qemu_system_ready;
69 #define SIG_IPI (SIGRTMIN+4)
71 CPUState *kvm_debug_cpu_requested;
73 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
74 /* The list of ioperm_data */
75 static QLIST_HEAD(, ioperm_data) ioperm_head;
76 #endif
78 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
80 static int handle_unhandled(uint64_t reason)
82 fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
83 return -EINVAL;
86 #define VMX_INVALID_GUEST_STATE 0x80000021
88 static int handle_failed_vmentry(uint64_t reason)
90 fprintf(stderr, "kvm: vm entry failed with error 0x%" PRIx64 "\n\n", reason);
92 /* Perhaps we will need to check if this machine is intel since exit reason 0x21
93 has a different interpretation on SVM */
94 if (reason == VMX_INVALID_GUEST_STATE) {
95 fprintf(stderr, "If you're runnning a guest on an Intel machine without\n");
96 fprintf(stderr, "unrestricted mode support, the failure can be most likely\n");
97 fprintf(stderr, "due to the guest entering an invalid state for Intel VT.\n");
98 fprintf(stderr, "For example, the guest maybe running in big real mode\n");
99 fprintf(stderr, "which is not supported on less recent Intel processors.\n\n");
102 return -EINVAL;
105 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
107 uint32_t *bitmap = kvm->used_gsi_bitmap;
109 if (gsi < kvm->max_gsi)
110 bitmap[gsi / 32] |= 1U << (gsi % 32);
111 else
112 DPRINTF("Invalid GSI %u\n", gsi);
115 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
117 uint32_t *bitmap = kvm->used_gsi_bitmap;
119 if (gsi < kvm->max_gsi)
120 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
121 else
122 DPRINTF("Invalid GSI %u\n", gsi);
125 static int kvm_create_context(void);
127 int kvm_init(void)
129 int fd;
130 int r, gsi_count, i;
133 fd = open("/dev/kvm", O_RDWR);
134 if (fd == -1) {
135 perror("open /dev/kvm");
136 return -1;
138 r = ioctl(fd, KVM_GET_API_VERSION, 0);
139 if (r == -1) {
140 fprintf(stderr,
141 "kvm kernel version too old: "
142 "KVM_GET_API_VERSION ioctl not supported\n");
143 goto out_close;
145 if (r < EXPECTED_KVM_API_VERSION) {
146 fprintf(stderr, "kvm kernel version too old: "
147 "We expect API version %d or newer, but got "
148 "version %d\n", EXPECTED_KVM_API_VERSION, r);
149 goto out_close;
151 if (r > EXPECTED_KVM_API_VERSION) {
152 fprintf(stderr, "kvm userspace version too old\n");
153 goto out_close;
155 kvm_state = qemu_mallocz(sizeof(*kvm_state));
156 kvm_context = &kvm_state->kvm_context;
158 kvm_state->fd = fd;
159 kvm_state->vmfd = -1;
161 #ifdef KVM_CAP_SET_GUEST_DEBUG
162 QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
163 #endif
165 for (i = 0; i < ARRAY_SIZE(kvm_state->slots); i++) {
166 kvm_state->slots[i].slot = i;
169 #ifdef KVM_CAP_USER_MEMORY
170 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
171 if (r <= 0) {
172 fprintf(stderr,
173 "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
174 goto out_close;
176 #else
177 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
178 #endif
180 gsi_count = kvm_get_gsi_count(kvm_context);
181 if (gsi_count > 0) {
182 int gsi_bits, i;
184 /* Round up so we can search ints using ffs */
185 gsi_bits = ALIGN(gsi_count, 32);
186 kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
187 kvm_context->max_gsi = gsi_bits;
189 /* Mark any over-allocated bits as already in use */
190 for (i = gsi_count; i < gsi_bits; i++) {
191 set_gsi(kvm_context, i);
195 kvm_cpu_register_phys_memory_client();
197 pthread_mutex_lock(&qemu_mutex);
198 return kvm_create_context();
200 out_close:
201 close(fd);
202 return -1;
205 static void kvm_finalize(KVMState *s)
207 /* FIXME
208 if (kvm->vcpu_fd[0] != -1)
209 close(kvm->vcpu_fd[0]);
210 if (kvm->vm_fd != -1)
211 close(kvm->vm_fd);
213 close(s->fd);
214 free(s);
217 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
219 #ifdef KVM_CAP_SET_BOOT_CPU_ID
220 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
221 if (r > 0) {
222 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
224 return -ENOSYS;
225 #else
226 return -ENOSYS;
227 #endif
230 void kvm_create_irqchip(kvm_context_t kvm)
232 int r;
234 #ifdef KVM_CAP_IRQCHIP
235 if (kvm_irqchip) {
236 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
237 if (r > 0) { /* kernel irqchip supported */
238 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
239 if (r >= 0) {
240 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
241 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
242 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
243 KVM_CAP_IRQ_INJECT_STATUS);
244 if (r > 0) {
245 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
247 #endif
248 kvm_state->irqchip_in_kernel = 1;
249 } else
250 fprintf(stderr, "Create kernel PIC irqchip failed\n");
253 #endif
256 #ifdef KVM_CAP_IRQCHIP
258 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
260 struct kvm_irq_level event;
261 int r;
263 if (!kvm_state->irqchip_in_kernel) {
264 return 0;
266 event.level = level;
267 event.irq = irq;
268 r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
269 if (r < 0) {
270 perror("kvm_set_irq_level");
273 if (status) {
274 #ifdef KVM_CAP_IRQ_INJECT_STATUS
275 *status =
276 (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
277 #else
278 *status = 1;
279 #endif
282 return 1;
285 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
287 int r;
289 if (!kvm_state->irqchip_in_kernel) {
290 return 0;
292 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
293 if (r < 0) {
294 perror("kvm_get_irqchip\n");
296 return r;
299 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
301 int r;
303 if (!kvm_state->irqchip_in_kernel) {
304 return 0;
306 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
307 if (r < 0) {
308 perror("kvm_set_irqchip\n");
310 return r;
313 #endif
315 static int handle_mmio(CPUState *env)
317 unsigned long addr = env->kvm_run->mmio.phys_addr;
318 struct kvm_run *kvm_run = env->kvm_run;
319 void *data = kvm_run->mmio.data;
321 /* hack: Red Hat 7.1 generates these weird accesses. */
322 if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) {
323 return 0;
326 cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
327 return 0;
330 int handle_io_window(kvm_context_t kvm)
332 return 1;
335 int handle_shutdown(kvm_context_t kvm, CPUState *env)
337 /* stop the current vcpu from going back to guest mode */
338 env->stopped = 1;
340 qemu_system_reset_request();
341 return 1;
344 static inline void push_nmi(kvm_context_t kvm)
346 #ifdef KVM_CAP_USER_NMI
347 kvm_arch_push_nmi();
348 #endif /* KVM_CAP_USER_NMI */
351 void post_kvm_run(kvm_context_t kvm, CPUState *env)
353 pthread_mutex_lock(&qemu_mutex);
354 kvm_arch_post_run(env, env->kvm_run);
355 cpu_single_env = env;
358 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
360 kvm_arch_pre_run(env, env->kvm_run);
362 pthread_mutex_unlock(&qemu_mutex);
363 return 0;
366 int kvm_is_ready_for_interrupt_injection(CPUState *env)
368 return env->kvm_run->ready_for_interrupt_injection;
371 int kvm_run(CPUState *env)
373 int r;
374 kvm_context_t kvm = &env->kvm_state->kvm_context;
375 struct kvm_run *run = env->kvm_run;
376 int fd = env->kvm_fd;
378 again:
379 if (env->kvm_vcpu_dirty) {
380 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
381 env->kvm_vcpu_dirty = 0;
383 push_nmi(kvm);
384 #if !defined(__s390__)
385 if (!kvm_state->irqchip_in_kernel) {
386 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
388 #endif
390 r = pre_kvm_run(kvm, env);
391 if (r) {
392 return r;
394 if (env->exit_request) {
395 env->exit_request = 0;
396 pthread_kill(env->thread->thread, SIG_IPI);
398 r = ioctl(fd, KVM_RUN, 0);
400 if (r == -1 && errno != EINTR && errno != EAGAIN) {
401 r = -errno;
402 post_kvm_run(kvm, env);
403 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
404 return r;
407 post_kvm_run(kvm, env);
409 kvm_flush_coalesced_mmio_buffer();
411 #if !defined(__s390__)
412 if (r == -1) {
413 r = handle_io_window(kvm);
414 goto more;
416 #endif
417 if (1) {
418 switch (run->exit_reason) {
419 case KVM_EXIT_UNKNOWN:
420 r = handle_unhandled(run->hw.hardware_exit_reason);
421 break;
422 case KVM_EXIT_FAIL_ENTRY:
423 r = handle_failed_vmentry(run->fail_entry.hardware_entry_failure_reason);
424 break;
425 case KVM_EXIT_EXCEPTION:
426 fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
427 run->ex.error_code);
428 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
429 abort();
430 break;
431 case KVM_EXIT_IO:
432 kvm_handle_io(run->io.port,
433 (uint8_t *)run + run->io.data_offset,
434 run->io.direction,
435 run->io.size,
436 run->io.count);
437 r = 0;
438 break;
439 case KVM_EXIT_MMIO:
440 r = handle_mmio(env);
441 break;
442 case KVM_EXIT_HLT:
443 r = kvm_arch_halt(env);
444 break;
445 case KVM_EXIT_IRQ_WINDOW_OPEN:
446 break;
447 case KVM_EXIT_SHUTDOWN:
448 r = handle_shutdown(kvm, env);
449 break;
450 #if defined(__s390__)
451 case KVM_EXIT_S390_SIEIC:
452 r = kvm_s390_handle_intercept(kvm, env, run);
453 break;
454 case KVM_EXIT_S390_RESET:
455 r = kvm_s390_handle_reset(kvm, env, run);
456 break;
457 #endif
458 case KVM_EXIT_INTERNAL_ERROR:
459 r = kvm_handle_internal_error(env, run);
460 break;
461 default:
462 r = kvm_arch_run(env);
463 if (r < 0) {
464 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
465 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
466 abort();
468 if (r > 0) {
469 return r;
471 break;
474 more:
475 if (!r) {
476 goto again;
478 return r;
481 int kvm_inject_irq(CPUState *env, unsigned irq)
483 struct kvm_interrupt intr;
485 intr.irq = irq;
486 return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
489 int kvm_inject_nmi(CPUState *env)
491 #ifdef KVM_CAP_USER_NMI
492 return kvm_vcpu_ioctl(env, KVM_NMI);
493 #else
494 return -ENOSYS;
495 #endif
498 int kvm_init_coalesced_mmio(kvm_context_t kvm)
500 int r = 0;
501 kvm_state->coalesced_mmio = 0;
502 #ifdef KVM_CAP_COALESCED_MMIO
503 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
504 if (r > 0) {
505 kvm_state->coalesced_mmio = r;
506 return 0;
508 #endif
509 return r;
512 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
513 int kvm_assign_pci_device(kvm_context_t kvm,
514 struct kvm_assigned_pci_dev *assigned_dev)
516 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
519 static int kvm_old_assign_irq(kvm_context_t kvm,
520 struct kvm_assigned_irq *assigned_irq)
522 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
525 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
526 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
528 int ret;
530 ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
531 if (ret > 0) {
532 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
535 return kvm_old_assign_irq(kvm, assigned_irq);
538 int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
540 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
542 #else
543 int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
545 return kvm_old_assign_irq(kvm, assigned_irq);
547 #endif
548 #endif
550 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
551 int kvm_deassign_pci_device(kvm_context_t kvm,
552 struct kvm_assigned_pci_dev *assigned_dev)
554 return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
556 #endif
558 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
560 #ifdef KVM_CAP_REINJECT_CONTROL
561 int r;
562 struct kvm_reinject_control control;
564 control.pit_reinject = pit_reinject;
566 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
567 if (r > 0) {
568 return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
570 #endif
571 return -ENOSYS;
574 int kvm_has_gsi_routing(void)
576 int r = 0;
578 #ifdef KVM_CAP_IRQ_ROUTING
579 r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
580 #endif
581 return r;
584 int kvm_get_gsi_count(kvm_context_t kvm)
586 #ifdef KVM_CAP_IRQ_ROUTING
587 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
588 #else
589 return -EINVAL;
590 #endif
593 int kvm_clear_gsi_routes(void)
595 #ifdef KVM_CAP_IRQ_ROUTING
596 kvm_context_t kvm = kvm_context;
598 kvm->irq_routes->nr = 0;
599 return 0;
600 #else
601 return -EINVAL;
602 #endif
605 int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry)
607 #ifdef KVM_CAP_IRQ_ROUTING
608 kvm_context_t kvm = kvm_context;
609 struct kvm_irq_routing *z;
610 struct kvm_irq_routing_entry *new;
611 int n, size;
613 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
614 n = kvm->nr_allocated_irq_routes * 2;
615 if (n < 64) {
616 n = 64;
618 size = sizeof(struct kvm_irq_routing);
619 size += n * sizeof(*new);
620 z = realloc(kvm->irq_routes, size);
621 if (!z) {
622 return -ENOMEM;
624 kvm->nr_allocated_irq_routes = n;
625 kvm->irq_routes = z;
627 n = kvm->irq_routes->nr++;
628 new = &kvm->irq_routes->entries[n];
629 memset(new, 0, sizeof(*new));
630 new->gsi = entry->gsi;
631 new->type = entry->type;
632 new->flags = entry->flags;
633 new->u = entry->u;
635 set_gsi(kvm, entry->gsi);
637 return 0;
638 #else
639 return -ENOSYS;
640 #endif
643 int kvm_add_irq_route(int gsi, int irqchip, int pin)
645 #ifdef KVM_CAP_IRQ_ROUTING
646 struct kvm_irq_routing_entry e;
648 e.gsi = gsi;
649 e.type = KVM_IRQ_ROUTING_IRQCHIP;
650 e.flags = 0;
651 e.u.irqchip.irqchip = irqchip;
652 e.u.irqchip.pin = pin;
653 return kvm_add_routing_entry(&e);
654 #else
655 return -ENOSYS;
656 #endif
659 int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry)
661 #ifdef KVM_CAP_IRQ_ROUTING
662 kvm_context_t kvm = kvm_context;
663 struct kvm_irq_routing_entry *e, *p;
664 int i, gsi, found = 0;
666 gsi = entry->gsi;
668 for (i = 0; i < kvm->irq_routes->nr; ++i) {
669 e = &kvm->irq_routes->entries[i];
670 if (e->type == entry->type && e->gsi == gsi) {
671 switch (e->type) {
672 case KVM_IRQ_ROUTING_IRQCHIP:{
673 if (e->u.irqchip.irqchip ==
674 entry->u.irqchip.irqchip
675 && e->u.irqchip.pin == entry->u.irqchip.pin) {
676 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
677 *e = *p;
678 found = 1;
680 break;
682 case KVM_IRQ_ROUTING_MSI:{
683 if (e->u.msi.address_lo ==
684 entry->u.msi.address_lo
685 && e->u.msi.address_hi ==
686 entry->u.msi.address_hi
687 && e->u.msi.data == entry->u.msi.data) {
688 p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
689 *e = *p;
690 found = 1;
692 break;
694 default:
695 break;
697 if (found) {
698 /* If there are no other users of this GSI
699 * mark it available in the bitmap */
700 for (i = 0; i < kvm->irq_routes->nr; i++) {
701 e = &kvm->irq_routes->entries[i];
702 if (e->gsi == gsi)
703 break;
705 if (i == kvm->irq_routes->nr) {
706 clear_gsi(kvm, gsi);
709 return 0;
713 return -ESRCH;
714 #else
715 return -ENOSYS;
716 #endif
719 int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry,
720 struct kvm_irq_routing_entry *newentry)
722 #ifdef KVM_CAP_IRQ_ROUTING
723 kvm_context_t kvm = kvm_context;
724 struct kvm_irq_routing_entry *e;
725 int i;
727 if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
728 return -EINVAL;
731 for (i = 0; i < kvm->irq_routes->nr; ++i) {
732 e = &kvm->irq_routes->entries[i];
733 if (e->type != entry->type || e->gsi != entry->gsi) {
734 continue;
736 switch (e->type) {
737 case KVM_IRQ_ROUTING_IRQCHIP:
738 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
739 e->u.irqchip.pin == entry->u.irqchip.pin) {
740 memcpy(&e->u.irqchip, &newentry->u.irqchip,
741 sizeof e->u.irqchip);
742 return 0;
744 break;
745 case KVM_IRQ_ROUTING_MSI:
746 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
747 e->u.msi.address_hi == entry->u.msi.address_hi &&
748 e->u.msi.data == entry->u.msi.data) {
749 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
750 return 0;
752 break;
753 default:
754 break;
757 return -ESRCH;
758 #else
759 return -ENOSYS;
760 #endif
763 int kvm_del_irq_route(int gsi, int irqchip, int pin)
765 #ifdef KVM_CAP_IRQ_ROUTING
766 struct kvm_irq_routing_entry e;
768 e.gsi = gsi;
769 e.type = KVM_IRQ_ROUTING_IRQCHIP;
770 e.flags = 0;
771 e.u.irqchip.irqchip = irqchip;
772 e.u.irqchip.pin = pin;
773 return kvm_del_routing_entry(&e);
774 #else
775 return -ENOSYS;
776 #endif
779 int kvm_commit_irq_routes(void)
781 #ifdef KVM_CAP_IRQ_ROUTING
782 kvm_context_t kvm = kvm_context;
784 kvm->irq_routes->flags = 0;
785 return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
786 #else
787 return -ENOSYS;
788 #endif
791 int kvm_get_irq_route_gsi(void)
793 kvm_context_t kvm = kvm_context;
794 int i, bit;
795 uint32_t *buf = kvm->used_gsi_bitmap;
797 /* Return the lowest unused GSI in the bitmap */
798 for (i = 0; i < kvm->max_gsi / 32; i++) {
799 bit = ffs(~buf[i]);
800 if (!bit) {
801 continue;
804 return bit - 1 + i * 32;
807 return -ENOSPC;
810 static void kvm_msi_routing_entry(struct kvm_irq_routing_entry *e,
811 KVMMsiMessage *msg)
814 e->gsi = msg->gsi;
815 e->type = KVM_IRQ_ROUTING_MSI;
816 e->flags = 0;
817 e->u.msi.address_lo = msg->addr_lo;
818 e->u.msi.address_hi = msg->addr_hi;
819 e->u.msi.data = msg->data;
822 int kvm_msi_message_add(KVMMsiMessage *msg)
824 struct kvm_irq_routing_entry e;
825 int ret;
827 ret = kvm_get_irq_route_gsi();
828 if (ret < 0) {
829 return ret;
831 msg->gsi = ret;
833 kvm_msi_routing_entry(&e, msg);
834 return kvm_add_routing_entry(&e);
837 int kvm_msi_message_del(KVMMsiMessage *msg)
839 struct kvm_irq_routing_entry e;
841 kvm_msi_routing_entry(&e, msg);
842 return kvm_del_routing_entry(&e);
845 int kvm_msi_message_update(KVMMsiMessage *old, KVMMsiMessage *new)
847 struct kvm_irq_routing_entry e1, e2;
848 int ret;
850 new->gsi = old->gsi;
851 if (memcmp(old, new, sizeof(KVMMsiMessage)) == 0) {
852 return 0;
855 kvm_msi_routing_entry(&e1, old);
856 kvm_msi_routing_entry(&e2, new);
858 ret = kvm_update_routing_entry(&e1, &e2);
859 if (ret < 0) {
860 return ret;
863 return 1;
867 #ifdef KVM_CAP_DEVICE_MSIX
868 int kvm_assign_set_msix_nr(kvm_context_t kvm,
869 struct kvm_assigned_msix_nr *msix_nr)
871 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
874 int kvm_assign_set_msix_entry(kvm_context_t kvm,
875 struct kvm_assigned_msix_entry *entry)
877 return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
879 #endif
881 unsigned long kvm_get_thread_id(void)
883 return syscall(SYS_gettid);
886 static void kvm_cond_wait(pthread_cond_t *cond)
888 CPUState *env = cpu_single_env;
890 pthread_cond_wait(cond, &qemu_mutex);
891 cpu_single_env = env;
894 static void sig_ipi_handler(int n)
898 static void sigbus_reraise(void)
900 sigset_t set;
901 struct sigaction action;
903 memset(&action, 0, sizeof(action));
904 action.sa_handler = SIG_DFL;
905 if (!sigaction(SIGBUS, &action, NULL)) {
906 raise(SIGBUS);
907 sigemptyset(&set);
908 sigaddset(&set, SIGBUS);
909 sigprocmask(SIG_UNBLOCK, &set, NULL);
911 perror("Failed to re-raise SIGBUS!\n");
912 abort();
915 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
916 void *ctx)
918 if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
919 sigbus_reraise();
922 void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
924 struct qemu_work_item wi;
926 if (env == current_env) {
927 func(data);
928 return;
931 wi.func = func;
932 wi.data = data;
933 if (!env->kvm_cpu_state.queued_work_first) {
934 env->kvm_cpu_state.queued_work_first = &wi;
935 } else {
936 env->kvm_cpu_state.queued_work_last->next = &wi;
938 env->kvm_cpu_state.queued_work_last = &wi;
939 wi.next = NULL;
940 wi.done = false;
942 pthread_kill(env->thread->thread, SIG_IPI);
943 while (!wi.done) {
944 kvm_cond_wait(&qemu_work_cond);
948 static void inject_interrupt(void *data)
950 cpu_interrupt(current_env, (long) data);
953 void kvm_inject_interrupt(CPUState *env, int mask)
955 on_vcpu(env, inject_interrupt, (void *) (long) mask);
958 void kvm_update_interrupt_request(CPUState *env)
960 int signal = 0;
962 if (env) {
963 if (!current_env || !current_env->created) {
964 signal = 1;
967 * Testing for created here is really redundant
969 if (current_env && current_env->created &&
970 env != current_env && !env->kvm_cpu_state.signalled) {
971 signal = 1;
974 if (signal) {
975 env->kvm_cpu_state.signalled = 1;
976 if (env->thread) {
977 pthread_kill(env->thread->thread, SIG_IPI);
983 int kvm_cpu_exec(CPUState *env)
985 int r;
987 r = kvm_run(env);
988 if (r < 0) {
989 printf("kvm_run returned %d\n", r);
990 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
991 vm_stop(VMSTOP_PANIC);
994 return 0;
997 int kvm_cpu_is_stopped(CPUState *env)
999 return !vm_running || env->stopped;
1002 static void flush_queued_work(CPUState *env)
1004 struct qemu_work_item *wi;
1006 if (!env->kvm_cpu_state.queued_work_first) {
1007 return;
1010 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1011 env->kvm_cpu_state.queued_work_first = wi->next;
1012 wi->func(wi->data);
1013 wi->done = true;
1015 env->kvm_cpu_state.queued_work_last = NULL;
1016 pthread_cond_broadcast(&qemu_work_cond);
1019 static void kvm_main_loop_wait(CPUState *env, int timeout)
1021 struct timespec ts;
1022 int r, e;
1023 siginfo_t siginfo;
1024 sigset_t waitset;
1025 sigset_t chkset;
1027 ts.tv_sec = timeout / 1000;
1028 ts.tv_nsec = (timeout % 1000) * 1000000;
1029 sigemptyset(&waitset);
1030 sigaddset(&waitset, SIG_IPI);
1031 sigaddset(&waitset, SIGBUS);
1033 do {
1034 pthread_mutex_unlock(&qemu_mutex);
1036 r = sigtimedwait(&waitset, &siginfo, &ts);
1037 e = errno;
1039 pthread_mutex_lock(&qemu_mutex);
1041 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1042 printf("sigtimedwait: %s\n", strerror(e));
1043 exit(1);
1046 switch (r) {
1047 case SIGBUS:
1048 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
1049 sigbus_reraise();
1050 break;
1051 default:
1052 break;
1055 r = sigpending(&chkset);
1056 if (r == -1) {
1057 printf("sigpending: %s\n", strerror(e));
1058 exit(1);
1060 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1062 cpu_single_env = env;
1063 flush_queued_work(env);
1065 if (env->stop) {
1066 env->stop = 0;
1067 env->stopped = 1;
1068 pthread_cond_signal(&qemu_pause_cond);
1071 env->kvm_cpu_state.signalled = 0;
1074 static int all_threads_paused(void)
1076 CPUState *penv = first_cpu;
1078 while (penv) {
1079 if (penv->stop) {
1080 return 0;
1082 penv = (CPUState *) penv->next_cpu;
1085 return 1;
1088 static void pause_all_threads(void)
1090 CPUState *penv = first_cpu;
1092 while (penv) {
1093 if (penv != cpu_single_env) {
1094 penv->stop = 1;
1095 pthread_kill(penv->thread->thread, SIG_IPI);
1096 } else {
1097 penv->stop = 0;
1098 penv->stopped = 1;
1099 cpu_exit(penv);
1101 penv = (CPUState *) penv->next_cpu;
1104 while (!all_threads_paused()) {
1105 kvm_cond_wait(&qemu_pause_cond);
1109 static void resume_all_threads(void)
1111 CPUState *penv = first_cpu;
1113 assert(!cpu_single_env);
1115 while (penv) {
1116 penv->stop = 0;
1117 penv->stopped = 0;
1118 pthread_kill(penv->thread->thread, SIG_IPI);
1119 penv = (CPUState *) penv->next_cpu;
1123 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1125 if (running) {
1126 resume_all_threads();
1127 } else {
1128 pause_all_threads();
1132 static void setup_kernel_sigmask(CPUState *env)
1134 sigset_t set;
1136 sigemptyset(&set);
1137 sigaddset(&set, SIGUSR2);
1138 sigaddset(&set, SIGIO);
1139 sigaddset(&set, SIGALRM);
1140 sigprocmask(SIG_BLOCK, &set, NULL);
1142 sigprocmask(SIG_BLOCK, NULL, &set);
1143 sigdelset(&set, SIG_IPI);
1144 sigdelset(&set, SIGBUS);
1146 kvm_set_signal_mask(env, &set);
1149 static void qemu_kvm_system_reset(void)
1151 pause_all_threads();
1153 cpu_synchronize_all_states();
1154 qemu_system_reset();
1156 resume_all_threads();
1159 static void process_irqchip_events(CPUState *env)
1161 kvm_arch_process_irqchip_events(env);
1162 if (kvm_arch_has_work(env))
1163 env->halted = 0;
1166 static int kvm_main_loop_cpu(CPUState *env)
1168 while (1) {
1169 int run_cpu = !kvm_cpu_is_stopped(env);
1170 if (run_cpu) {
1171 kvm_arch_process_async_events(env);
1172 if (!kvm_irqchip_in_kernel()) {
1173 process_irqchip_events(env);
1174 run_cpu = !env->halted;
1177 if (run_cpu) {
1178 kvm_cpu_exec(env);
1179 kvm_main_loop_wait(env, 0);
1180 } else {
1181 kvm_main_loop_wait(env, 1000);
1184 pthread_mutex_unlock(&qemu_mutex);
1185 return 0;
1188 static void *ap_main_loop(void *_env)
1190 CPUState *env = _env;
1191 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1192 struct ioperm_data *data = NULL;
1193 #endif
1195 current_env = env;
1196 env->thread_id = kvm_get_thread_id();
1198 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1199 /* do ioperm for io ports of assigned devices */
1200 QLIST_FOREACH(data, &ioperm_head, entries)
1201 on_vcpu(env, kvm_arch_do_ioperm, data);
1202 #endif
1204 pthread_mutex_lock(&qemu_mutex);
1205 cpu_single_env = env;
1207 if (kvm_create_vcpu(env) < 0) {
1208 abort();
1210 setup_kernel_sigmask(env);
1212 /* signal VCPU creation */
1213 current_env->created = 1;
1214 pthread_cond_signal(&qemu_vcpu_cond);
1216 /* and wait for machine initialization */
1217 while (!qemu_system_ready) {
1218 kvm_cond_wait(&qemu_system_cond);
1221 /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
1222 cpu_single_env = env;
1224 kvm_main_loop_cpu(env);
1225 return NULL;
1228 int kvm_init_vcpu(CPUState *env)
1230 env->thread = qemu_mallocz(sizeof(QemuThread));
1231 qemu_thread_create(env->thread, ap_main_loop, env);
1233 while (env->created == 0) {
1234 kvm_cond_wait(&qemu_vcpu_cond);
1237 return 0;
1240 #ifdef TARGET_I386
1241 void kvm_hpet_disable_kpit(void)
1243 struct kvm_pit_state2 ps2;
1245 kvm_get_pit2(kvm_context, &ps2);
1246 ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
1247 kvm_set_pit2(kvm_context, &ps2);
1250 void kvm_hpet_enable_kpit(void)
1252 struct kvm_pit_state2 ps2;
1254 kvm_get_pit2(kvm_context, &ps2);
1255 ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
1256 kvm_set_pit2(kvm_context, &ps2);
1258 #endif
1260 int kvm_init_ap(void)
1262 struct sigaction action;
1264 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
1266 signal(SIG_IPI, sig_ipi_handler);
1268 memset(&action, 0, sizeof(action));
1269 action.sa_flags = SA_SIGINFO;
1270 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
1271 sigaction(SIGBUS, &action, NULL);
1272 prctl(PR_MCE_KILL, 1, 1, 0, 0);
1273 return 0;
1276 /* If we have signalfd, we mask out the signals we want to handle and then
1277 * use signalfd to listen for them. We rely on whatever the current signal
1278 * handler is to dispatch the signals when we receive them.
1281 static void sigfd_handler(void *opaque)
1283 int fd = (unsigned long) opaque;
1284 struct qemu_signalfd_siginfo info;
1285 struct sigaction action;
1286 ssize_t len;
1288 while (1) {
1289 do {
1290 len = read(fd, &info, sizeof(info));
1291 } while (len == -1 && errno == EINTR);
1293 if (len == -1 && errno == EAGAIN) {
1294 break;
1297 if (len != sizeof(info)) {
1298 printf("read from sigfd returned %zd: %m\n", len);
1299 return;
1302 sigaction(info.ssi_signo, NULL, &action);
1303 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
1304 action.sa_sigaction(info.ssi_signo,
1305 (siginfo_t *)&info, NULL);
1306 } else if (action.sa_handler) {
1307 action.sa_handler(info.ssi_signo);
1312 int kvm_main_loop(void)
1314 sigset_t mask;
1315 int sigfd;
1317 qemu_system_ready = 1;
1319 sigemptyset(&mask);
1320 sigaddset(&mask, SIGIO);
1321 sigaddset(&mask, SIGALRM);
1322 sigaddset(&mask, SIGBUS);
1323 sigprocmask(SIG_BLOCK, &mask, NULL);
1325 sigfd = qemu_signalfd(&mask);
1326 if (sigfd == -1) {
1327 fprintf(stderr, "failed to create signalfd\n");
1328 return -errno;
1331 fcntl(sigfd, F_SETFL, O_NONBLOCK);
1333 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
1334 (void *)(unsigned long) sigfd);
1336 pthread_cond_broadcast(&qemu_system_cond);
1338 cpu_single_env = NULL;
1340 while (1) {
1341 main_loop_wait(0);
1342 if (qemu_shutdown_requested()) {
1343 monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
1344 if (qemu_no_shutdown()) {
1345 vm_stop(VMSTOP_SHUTDOWN);
1346 } else {
1347 break;
1349 } else if (qemu_powerdown_requested()) {
1350 monitor_protocol_event(QEVENT_POWERDOWN, NULL);
1351 qemu_irq_raise(qemu_system_powerdown);
1352 } else if (qemu_reset_requested()) {
1353 qemu_kvm_system_reset();
1354 } else if (kvm_debug_cpu_requested) {
1355 gdb_set_stop_cpu(kvm_debug_cpu_requested);
1356 vm_stop(VMSTOP_DEBUG);
1357 kvm_debug_cpu_requested = NULL;
1361 bdrv_close_all();
1362 pause_all_threads();
1363 pthread_mutex_unlock(&qemu_mutex);
1365 return 0;
1368 #if !defined(TARGET_I386)
1369 int kvm_arch_init_irq_routing(void)
1371 return 0;
1373 #endif
1375 extern int no_hpet;
1377 static int kvm_create_context(void)
1379 static const char upgrade_note[] =
1380 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
1381 "(see http://sourceforge.net/projects/kvm).\n";
1383 int r;
1385 kvm_state->pit_in_kernel = kvm_pit;
1387 #ifdef KVM_CAP_IRQ_ROUTING
1388 kvm_context->irq_routes = qemu_mallocz(sizeof(*kvm_context->irq_routes));
1389 kvm_context->nr_allocated_irq_routes = 0;
1390 #endif
1392 kvm_state->vmfd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
1393 if (kvm_state->vmfd < 0) {
1394 fprintf(stderr, "kvm_create_vm: %m\n");
1395 kvm_finalize(kvm_state);
1396 return -1;
1399 r = kvm_arch_create(kvm_context);
1400 if (r < 0) {
1401 kvm_finalize(kvm_state);
1402 return r;
1405 kvm_create_irqchip(kvm_context);
1407 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
1408 * destroyed properly. Since we rely on this capability, refuse to work
1409 * with any kernel without this capability. */
1410 if (!kvm_check_extension(kvm_state, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
1411 fprintf(stderr,
1412 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
1413 upgrade_note);
1414 return -EINVAL;
1417 r = kvm_arch_init_irq_routing();
1418 if (r < 0) {
1419 return r;
1422 kvm_state->vcpu_events = 0;
1423 #ifdef KVM_CAP_VCPU_EVENTS
1424 kvm_state->vcpu_events = kvm_check_extension(kvm_state, KVM_CAP_VCPU_EVENTS);
1425 #endif
1427 kvm_state->debugregs = 0;
1428 #ifdef KVM_CAP_DEBUGREGS
1429 kvm_state->debugregs = kvm_check_extension(kvm_state, KVM_CAP_DEBUGREGS);
1430 #endif
1432 kvm_state->xsave = 0;
1433 #ifdef KVM_CAP_XSAVE
1434 kvm_state->xsave = kvm_check_extension(kvm_state, KVM_CAP_XSAVE);
1435 #endif
1437 kvm_state->xcrs = 0;
1438 #ifdef KVM_CAP_XCRS
1439 kvm_state->xcrs = kvm_check_extension(kvm_state, KVM_CAP_XCRS);
1440 #endif
1442 kvm_state->many_ioeventfds = kvm_check_many_ioeventfds();
1444 kvm_init_ap();
1445 if (kvm_irqchip) {
1446 if (!qemu_kvm_has_gsi_routing()) {
1447 irq0override = 0;
1448 #ifdef TARGET_I386
1449 /* if kernel can't do irq routing, interrupt source
1450 * override 0->2 can not be set up as required by hpet,
1451 * so disable hpet.
1453 no_hpet = 1;
1454 } else if (!qemu_kvm_has_pit_state2()) {
1455 no_hpet = 1;
1457 #else
1459 #endif
1462 return 0;
1465 #ifdef KVM_CAP_IRQCHIP
1467 int kvm_set_irq(int irq, int level, int *status)
1469 return kvm_set_irq_level(kvm_context, irq, level, status);
1472 #endif
1474 static void kvm_mutex_unlock(void)
1476 assert(!cpu_single_env);
1477 pthread_mutex_unlock(&qemu_mutex);
1480 static void kvm_mutex_lock(void)
1482 pthread_mutex_lock(&qemu_mutex);
1483 cpu_single_env = NULL;
1486 void qemu_mutex_unlock_iothread(void)
1488 if (kvm_enabled()) {
1489 kvm_mutex_unlock();
1493 void qemu_mutex_lock_iothread(void)
1495 if (kvm_enabled()) {
1496 kvm_mutex_lock();
1500 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1501 void kvm_add_ioperm_data(struct ioperm_data *data)
1503 QLIST_INSERT_HEAD(&ioperm_head, data, entries);
1506 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
1508 struct ioperm_data *data;
1510 data = QLIST_FIRST(&ioperm_head);
1511 while (data) {
1512 struct ioperm_data *next = QLIST_NEXT(data, entries);
1514 if (data->start_port == start_port && data->num == num) {
1515 QLIST_REMOVE(data, entries);
1516 qemu_free(data);
1519 data = next;
1523 void kvm_ioperm(CPUState *env, void *data)
1525 if (kvm_enabled() && qemu_system_ready) {
1526 on_vcpu(env, kvm_arch_do_ioperm, data);
1530 #endif
1532 int kvm_set_boot_cpu_id(uint32_t id)
1534 return kvm_set_boot_vcpu_id(kvm_context, id);