kvm: external module: adjust for new host kernels install location
[qemu-kvm/fedora.git] / qemu-kvm.c
blob8b5d2dc21c46686fe9f4d857afbc208037c21434
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <string.h>
16 #include "hw/hw.h"
17 #include "sysemu.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
23 #include <sys/syscall.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 static int qemu_kvm_reset_requested;
33 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
34 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
35 __thread struct vcpu_info *vcpu;
37 struct qemu_kvm_signal_table {
38 sigset_t sigset;
39 sigset_t negsigset;
42 static struct qemu_kvm_signal_table io_signal_table;
43 static struct qemu_kvm_signal_table vcpu_signal_table;
45 #define SIG_IPI (SIGRTMIN+4)
47 struct vcpu_info {
48 CPUState *env;
49 int sipi_needed;
50 int init;
51 pthread_t thread;
52 int signalled;
53 int stop;
54 int stopped;
55 } vcpu_info[256];
57 pthread_t io_thread;
59 static inline unsigned long kvm_get_thread_id(void)
61 return syscall(SYS_gettid);
64 CPUState *qemu_kvm_cpu_env(int index)
66 return vcpu_info[index].env;
69 static void sig_ipi_handler(int n)
73 void kvm_update_interrupt_request(CPUState *env)
75 int signal = 0;
77 if (env) {
78 if (!vcpu)
79 signal = 1;
80 if (vcpu && env != vcpu->env && !vcpu_info[env->cpu_index].signalled)
81 signal = 1;
83 if (signal) {
84 vcpu_info[env->cpu_index].signalled = 1;
85 if (vcpu_info[env->cpu_index].thread)
86 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
91 void kvm_update_after_sipi(CPUState *env)
93 vcpu_info[env->cpu_index].sipi_needed = 1;
94 kvm_update_interrupt_request(env);
97 void kvm_apic_init(CPUState *env)
99 if (env->cpu_index != 0)
100 vcpu_info[env->cpu_index].init = 1;
101 kvm_update_interrupt_request(env);
104 #include <signal.h>
106 static int try_push_interrupts(void *opaque)
108 return kvm_arch_try_push_interrupts(opaque);
111 static void post_kvm_run(void *opaque, int vcpu)
114 pthread_mutex_lock(&qemu_mutex);
115 kvm_arch_post_kvm_run(opaque, vcpu);
118 static int pre_kvm_run(void *opaque, int vcpu)
120 CPUState *env = qemu_kvm_cpu_env(vcpu);
122 kvm_arch_pre_kvm_run(opaque, vcpu);
124 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
125 return 1;
126 pthread_mutex_unlock(&qemu_mutex);
127 return 0;
130 void kvm_load_registers(CPUState *env)
132 if (kvm_enabled())
133 kvm_arch_load_regs(env);
136 void kvm_save_registers(CPUState *env)
138 if (kvm_enabled())
139 kvm_arch_save_regs(env);
142 int kvm_cpu_exec(CPUState *env)
144 int r;
146 r = kvm_run(kvm_context, env->cpu_index);
147 if (r < 0) {
148 printf("kvm_run returned %d\n", r);
149 exit(1);
152 return 0;
155 extern int vm_running;
157 static int has_work(CPUState *env)
159 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
160 return 0;
161 if (!(env->hflags & HF_HALTED_MASK))
162 return 1;
163 return kvm_arch_has_work(env);
166 static int kvm_process_signal(int si_signo)
168 struct sigaction sa;
170 switch (si_signo) {
171 case SIGUSR2:
172 pthread_cond_signal(&qemu_aio_cond);
173 break;
174 case SIGALRM:
175 case SIGIO:
176 sigaction(si_signo, NULL, &sa);
177 sa.sa_handler(si_signo);
178 break;
181 return 1;
184 static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
185 int timeout)
187 struct timespec ts;
188 int r, e, ret = 0;
189 siginfo_t siginfo;
191 ts.tv_sec = timeout / 1000;
192 ts.tv_nsec = (timeout % 1000) * 1000000;
193 r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
194 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
195 return 0;
196 e = errno;
197 pthread_mutex_lock(&qemu_mutex);
198 if (env && vcpu)
199 cpu_single_env = vcpu->env;
200 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
201 printf("sigtimedwait: %s\n", strerror(e));
202 exit(1);
204 if (r != -1)
205 ret = kvm_process_signal(siginfo.si_signo);
207 if (env && vcpu_info[env->cpu_index].stop) {
208 vcpu_info[env->cpu_index].stop = 0;
209 vcpu_info[env->cpu_index].stopped = 1;
210 pthread_kill(io_thread, SIGUSR1);
212 pthread_mutex_unlock(&qemu_mutex);
214 return ret;
218 static void kvm_eat_signals(CPUState *env, int timeout)
220 int r = 0;
221 struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
223 while (kvm_eat_signal(waitset, env, 0))
224 r = 1;
225 if (!r && timeout) {
226 r = kvm_eat_signal(waitset, env, timeout);
227 if (r)
228 while (kvm_eat_signal(waitset, env, 0))
233 static void kvm_main_loop_wait(CPUState *env, int timeout)
235 pthread_mutex_unlock(&qemu_mutex);
236 kvm_eat_signals(env, timeout);
237 pthread_mutex_lock(&qemu_mutex);
238 cpu_single_env = env;
239 vcpu_info[env->cpu_index].signalled = 0;
242 static int all_threads_paused(void)
244 int i;
246 for (i = 0; i < smp_cpus; ++i)
247 if (vcpu_info[i].stopped)
248 return 0;
249 return 1;
252 static void pause_all_threads(void)
254 int i;
256 for (i = 0; i < smp_cpus; ++i) {
257 vcpu_info[i].stop = 1;
258 pthread_kill(vcpu_info[i].thread, SIG_IPI);
260 while (!all_threads_paused())
261 kvm_eat_signal(&io_signal_table, NULL, 1000);
264 static void resume_all_threads(void)
266 int i;
268 for (i = 0; i < smp_cpus; ++i) {
269 vcpu_info[i].stop = 0;
270 vcpu_info[i].stopped = 0;
271 pthread_kill(vcpu_info[i].thread, SIG_IPI);
275 static void kvm_vm_state_change_handler(void *context, int running)
277 if (running)
278 resume_all_threads();
279 else
280 pause_all_threads();
283 static void update_regs_for_sipi(CPUState *env)
285 kvm_arch_update_regs_for_sipi(env);
286 vcpu_info[env->cpu_index].sipi_needed = 0;
287 vcpu_info[env->cpu_index].init = 0;
290 static void update_regs_for_init(CPUState *env)
292 cpu_reset(env);
293 kvm_arch_load_regs(env);
296 static void setup_kernel_sigmask(CPUState *env)
298 sigset_t set;
300 sigprocmask(SIG_BLOCK, NULL, &set);
301 sigdelset(&set, SIG_IPI);
303 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
306 static int kvm_main_loop_cpu(CPUState *env)
308 struct vcpu_info *info = &vcpu_info[env->cpu_index];
310 setup_kernel_sigmask(env);
311 pthread_mutex_lock(&qemu_mutex);
313 kvm_qemu_init_env(env);
314 env->ready_for_interrupt_injection = 1;
315 #ifdef TARGET_I386
316 kvm_tpr_vcpu_start(env);
317 #endif
319 cpu_single_env = env;
320 while (1) {
321 while (!has_work(env))
322 kvm_main_loop_wait(env, 10);
323 if (env->interrupt_request & CPU_INTERRUPT_HARD)
324 env->hflags &= ~HF_HALTED_MASK;
325 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
326 update_regs_for_sipi(env);
327 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
328 update_regs_for_init(env);
329 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
330 kvm_cpu_exec(env);
331 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
332 kvm_main_loop_wait(env, 0);
333 if (qemu_kvm_reset_requested && env->cpu_index == 0) {
334 qemu_kvm_reset_requested = 0;
335 env->interrupt_request = 0;
336 qemu_system_reset();
337 kvm_arch_load_regs(env);
340 pthread_mutex_unlock(&qemu_mutex);
341 return 0;
344 static void *ap_main_loop(void *_env)
346 CPUState *env = _env;
347 sigset_t signals;
349 vcpu = &vcpu_info[env->cpu_index];
350 vcpu->env = env;
351 vcpu->env->thread_id = kvm_get_thread_id();
352 sigfillset(&signals);
353 sigdelset(&signals, SIG_IPI);
354 sigprocmask(SIG_BLOCK, &signals, NULL);
355 kvm_create_vcpu(kvm_context, env->cpu_index);
356 kvm_qemu_init_env(env);
357 if (kvm_irqchip_in_kernel(kvm_context))
358 env->hflags &= ~HF_HALTED_MASK;
359 kvm_main_loop_cpu(env);
360 return NULL;
363 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
365 sigemptyset(&sigtab->sigset);
366 sigfillset(&sigtab->negsigset);
369 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
371 sigaddset(&sigtab->sigset, signum);
372 sigdelset(&sigtab->negsigset, signum);
375 void kvm_init_new_ap(int cpu, CPUState *env)
377 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
380 static void qemu_kvm_init_signal_tables(void)
382 qemu_kvm_init_signal_table(&io_signal_table);
383 qemu_kvm_init_signal_table(&vcpu_signal_table);
385 kvm_add_signal(&io_signal_table, SIGIO);
386 kvm_add_signal(&io_signal_table, SIGALRM);
387 kvm_add_signal(&io_signal_table, SIGUSR1);
388 kvm_add_signal(&io_signal_table, SIGUSR2);
390 kvm_add_signal(&vcpu_signal_table, SIG_IPI);
392 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
395 int kvm_init_ap(void)
397 CPUState *env = first_cpu;
398 int i;
400 #ifdef TARGET_I386
401 kvm_tpr_opt_setup();
402 #endif
403 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
404 qemu_kvm_init_signal_tables();
406 signal(SIG_IPI, sig_ipi_handler);
407 for (i = 0; i < smp_cpus; ++i) {
408 kvm_init_new_ap(i, env);
409 env = env->next_cpu;
411 return 0;
414 void qemu_kvm_notify_work(void)
416 if (io_thread)
417 pthread_kill(io_thread, SIGUSR1);
421 * The IO thread has all signals that inform machine events
422 * blocked (io_signal_table), so it won't get interrupted
423 * while processing in main_loop_wait().
426 int kvm_main_loop(void)
428 io_thread = pthread_self();
429 pthread_mutex_unlock(&qemu_mutex);
430 while (1) {
431 kvm_eat_signal(&io_signal_table, NULL, 1000);
432 pthread_mutex_lock(&qemu_mutex);
433 cpu_single_env = NULL;
434 main_loop_wait(0);
435 if (qemu_shutdown_requested())
436 break;
437 else if (qemu_powerdown_requested())
438 qemu_system_powerdown();
439 else if (qemu_reset_requested()) {
440 pthread_kill(vcpu_info[0].thread, SIG_IPI);
441 qemu_kvm_reset_requested = 1;
443 pthread_mutex_unlock(&qemu_mutex);
446 pthread_mutex_unlock(&qemu_mutex);
447 return 0;
450 static int kvm_debug(void *opaque, int vcpu)
452 CPUState *env = cpu_single_env;
454 env->exception_index = EXCP_DEBUG;
455 return 1;
458 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
460 *data = cpu_inb(0, addr);
461 return 0;
464 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
466 *data = cpu_inw(0, addr);
467 return 0;
470 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
472 *data = cpu_inl(0, addr);
473 return 0;
476 #define PM_IO_BASE 0xb000
478 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
480 if (addr == 0xb2) {
481 switch (data) {
482 case 0: {
483 cpu_outb(0, 0xb3, 0);
484 break;
486 case 0xf0: {
487 unsigned x;
489 /* enable acpi */
490 x = cpu_inw(0, PM_IO_BASE + 4);
491 x &= ~1;
492 cpu_outw(0, PM_IO_BASE + 4, x);
493 break;
495 case 0xf1: {
496 unsigned x;
498 /* enable acpi */
499 x = cpu_inw(0, PM_IO_BASE + 4);
500 x |= 1;
501 cpu_outw(0, PM_IO_BASE + 4, x);
502 break;
504 default:
505 break;
507 return 0;
509 cpu_outb(0, addr, data);
510 return 0;
513 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
515 cpu_outw(0, addr, data);
516 return 0;
519 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
521 cpu_outl(0, addr, data);
522 return 0;
525 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
527 cpu_physical_memory_rw(addr, data, len, 0);
528 return 0;
531 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
533 cpu_physical_memory_rw(addr, data, len, 1);
534 return 0;
537 static int kvm_io_window(void *opaque)
539 return 1;
543 static int kvm_halt(void *opaque, int vcpu)
545 return kvm_arch_halt(opaque, vcpu);
548 static int kvm_shutdown(void *opaque, int vcpu)
550 qemu_system_reset_request();
551 return 1;
554 static struct kvm_callbacks qemu_kvm_ops = {
555 .debug = kvm_debug,
556 .inb = kvm_inb,
557 .inw = kvm_inw,
558 .inl = kvm_inl,
559 .outb = kvm_outb,
560 .outw = kvm_outw,
561 .outl = kvm_outl,
562 .mmio_read = kvm_mmio_read,
563 .mmio_write = kvm_mmio_write,
564 .halt = kvm_halt,
565 .shutdown = kvm_shutdown,
566 .io_window = kvm_io_window,
567 .try_push_interrupts = try_push_interrupts,
568 .post_kvm_run = post_kvm_run,
569 .pre_kvm_run = pre_kvm_run,
570 #ifdef TARGET_I386
571 .tpr_access = handle_tpr_access,
572 #endif
573 #ifdef TARGET_PPC
574 .powerpc_dcr_read = handle_powerpc_dcr_read,
575 .powerpc_dcr_write = handle_powerpc_dcr_write,
576 #endif
579 int kvm_qemu_init()
581 /* Try to initialize kvm */
582 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
583 if (!kvm_context) {
584 return -1;
586 pthread_mutex_lock(&qemu_mutex);
588 return 0;
591 int kvm_qemu_create_context(void)
593 int r;
594 if (!kvm_irqchip) {
595 kvm_disable_irqchip_creation(kvm_context);
597 if (!kvm_pit) {
598 kvm_disable_pit_creation(kvm_context);
600 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
601 kvm_qemu_destroy();
602 return -1;
604 r = kvm_arch_qemu_create_context();
605 if(r <0)
606 kvm_qemu_destroy();
607 return 0;
610 void kvm_qemu_destroy(void)
612 kvm_finalize(kvm_context);
615 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
616 unsigned long size,
617 unsigned long phys_offset)
619 #ifdef KVM_CAP_USER_MEMORY
620 int r = 0;
622 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
623 if (r) {
624 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
625 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
626 if (r)
627 return;
628 r = kvm_is_intersecting_mem(kvm_context, start_addr);
629 if (r)
630 kvm_create_mem_hole(kvm_context, start_addr, size);
631 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
632 phys_ram_base + phys_offset,
633 size, 0);
635 if (phys_offset & IO_MEM_ROM) {
636 phys_offset &= ~IO_MEM_ROM;
637 r = kvm_is_intersecting_mem(kvm_context, start_addr);
638 if (r)
639 kvm_create_mem_hole(kvm_context, start_addr, size);
640 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
641 phys_ram_base + phys_offset,
642 size, 0);
644 if (r < 0) {
645 printf("kvm_cpu_register_physical_memory: failed\n");
646 exit(1);
648 return;
650 #endif
651 if (phys_offset & IO_MEM_ROM) {
652 phys_offset &= ~IO_MEM_ROM;
653 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
657 int kvm_qemu_check_extension(int ext)
659 return kvm_check_extension(kvm_context, ext);
662 int kvm_qemu_init_env(CPUState *cenv)
664 return kvm_arch_qemu_init_env(cenv);
667 int kvm_update_debugger(CPUState *env)
669 struct kvm_debug_guest dbg;
670 int i;
672 dbg.enabled = 0;
673 if (env->nb_breakpoints || env->singlestep_enabled) {
674 dbg.enabled = 1;
675 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
676 dbg.breakpoints[i].enabled = 1;
677 dbg.breakpoints[i].address = env->breakpoints[i];
679 dbg.singlestep = env->singlestep_enabled;
681 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
686 * dirty pages logging
688 /* FIXME: use unsigned long pointer instead of unsigned char */
689 unsigned char *kvm_dirty_bitmap = NULL;
690 int kvm_physical_memory_set_dirty_tracking(int enable)
692 int r = 0;
694 if (!kvm_enabled())
695 return 0;
697 if (enable) {
698 if (!kvm_dirty_bitmap) {
699 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
700 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
701 if (kvm_dirty_bitmap == NULL) {
702 perror("Failed to allocate dirty pages bitmap");
703 r=-1;
705 else {
706 r = kvm_dirty_pages_log_enable_all(kvm_context);
710 else {
711 if (kvm_dirty_bitmap) {
712 r = kvm_dirty_pages_log_reset(kvm_context);
713 qemu_free(kvm_dirty_bitmap);
714 kvm_dirty_bitmap = NULL;
717 return r;
720 /* get kvm's dirty pages bitmap and update qemu's */
721 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
722 unsigned char *bitmap,
723 unsigned int offset,
724 unsigned long mem_size)
726 unsigned int i, j, n=0;
727 unsigned char c;
728 unsigned page_number, addr, addr1;
729 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
732 * bitmap-traveling is faster than memory-traveling (for addr...)
733 * especially when most of the memory is not dirty.
735 for (i=0; i<len; i++) {
736 c = bitmap[i];
737 while (c>0) {
738 j = ffsl(c) - 1;
739 c &= ~(1u<<j);
740 page_number = i * 8 + j;
741 addr1 = page_number * TARGET_PAGE_SIZE;
742 addr = offset + addr1;
743 cpu_physical_memory_set_dirty(addr);
744 n++;
747 return 0;
749 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
750 void *bitmap, void *opaque)
752 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
756 * get kvm's dirty pages bitmap and update qemu's
757 * we only care about physical ram, which resides in slots 0 and 3
759 int kvm_update_dirty_pages_log(void)
761 int r = 0;
764 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
765 kvm_dirty_bitmap, NULL,
766 kvm_get_dirty_bitmap_cb);
767 return r;
770 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
772 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
773 unsigned int brsize = BITMAP_SIZE(ram_size);
774 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
775 unsigned int extra_bytes = (extra_pages +7)/8;
776 unsigned int hole_start = BITMAP_SIZE(0xa0000);
777 unsigned int hole_end = BITMAP_SIZE(0xc0000);
779 memset(bitmap, 0xFF, brsize + extra_bytes);
780 memset(bitmap + hole_start, 0, hole_end - hole_start);
781 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
783 return 0;
786 #ifdef KVM_CAP_IRQCHIP
788 int kvm_set_irq(int irq, int level)
790 return kvm_set_irq_level(kvm_context, irq, level);
793 #endif
795 void qemu_kvm_aio_wait_start(void)
799 void qemu_kvm_aio_wait(void)
801 CPUState *cpu_single = cpu_single_env;
803 if (!cpu_single_env) {
804 pthread_mutex_unlock(&qemu_mutex);
805 kvm_eat_signal(&io_signal_table, NULL, 1000);
806 pthread_mutex_lock(&qemu_mutex);
807 cpu_single_env = NULL;
808 } else {
809 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
810 cpu_single_env = cpu_single;
814 void qemu_kvm_aio_wait_end(void)
818 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
820 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
823 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
824 unsigned long size, int log, int writable)
826 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
829 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
830 unsigned long size)
832 kvm_destroy_phys_mem(kvm_context, start_addr, size);