Add fls()
[qemu-kvm/fedora.git] / qemu-kvm.c
blob4d59b39bd31f4e071ddccc91775ba764fa462096
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <string.h>
16 #include "hw/hw.h"
17 #include "sysemu.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
23 #include <sys/syscall.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
32 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
33 __thread struct vcpu_info *vcpu;
35 struct qemu_kvm_signal_table {
36 sigset_t sigset;
37 sigset_t negsigset;
40 static struct qemu_kvm_signal_table io_signal_table;
42 #define SIG_IPI (SIGRTMIN+4)
44 struct vcpu_info {
45 CPUState *env;
46 int sipi_needed;
47 int init;
48 pthread_t thread;
49 int signalled;
50 int stop;
51 int stopped;
52 } vcpu_info[256];
54 static inline unsigned long kvm_get_thread_id(void)
56 return syscall(SYS_gettid);
59 CPUState *qemu_kvm_cpu_env(int index)
61 return vcpu_info[index].env;
64 static void sig_ipi_handler(int n)
68 void kvm_update_interrupt_request(CPUState *env)
70 if (env && vcpu && env != vcpu->env) {
71 if (vcpu_info[env->cpu_index].signalled)
72 return;
73 vcpu_info[env->cpu_index].signalled = 1;
74 if (vcpu_info[env->cpu_index].thread)
75 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
79 void kvm_update_after_sipi(CPUState *env)
81 vcpu_info[env->cpu_index].sipi_needed = 1;
82 kvm_update_interrupt_request(env);
85 void kvm_apic_init(CPUState *env)
87 if (env->cpu_index != 0)
88 vcpu_info[env->cpu_index].init = 1;
89 kvm_update_interrupt_request(env);
92 #include <signal.h>
94 static int try_push_interrupts(void *opaque)
96 return kvm_arch_try_push_interrupts(opaque);
99 static void post_kvm_run(void *opaque, int vcpu)
102 pthread_mutex_lock(&qemu_mutex);
103 kvm_arch_post_kvm_run(opaque, vcpu);
106 static int pre_kvm_run(void *opaque, int vcpu)
108 CPUState *env = cpu_single_env;
110 kvm_arch_pre_kvm_run(opaque, vcpu);
112 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
113 return 1;
114 pthread_mutex_unlock(&qemu_mutex);
115 return 0;
118 void kvm_load_registers(CPUState *env)
120 if (kvm_enabled())
121 kvm_arch_load_regs(env);
124 void kvm_save_registers(CPUState *env)
126 if (kvm_enabled())
127 kvm_arch_save_regs(env);
130 int kvm_cpu_exec(CPUState *env)
132 int r;
134 r = kvm_run(kvm_context, env->cpu_index);
135 if (r < 0) {
136 printf("kvm_run returned %d\n", r);
137 exit(1);
140 return 0;
143 extern int vm_running;
145 static int has_work(CPUState *env)
147 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
148 return 0;
149 if (!(env->hflags & HF_HALTED_MASK))
150 return 1;
151 return kvm_arch_has_work(env);
154 static int kvm_eat_signal(CPUState *env, int timeout)
156 struct timespec ts;
157 int r, e, ret = 0;
158 siginfo_t siginfo;
159 struct sigaction sa;
161 ts.tv_sec = timeout / 1000;
162 ts.tv_nsec = (timeout % 1000) * 1000000;
163 r = sigtimedwait(&io_signal_table.sigset, &siginfo, &ts);
164 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
165 return 0;
166 e = errno;
167 pthread_mutex_lock(&qemu_mutex);
168 if (vcpu)
169 cpu_single_env = vcpu->env;
170 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
171 printf("sigtimedwait: %s\n", strerror(e));
172 exit(1);
174 if (r != -1) {
175 sigaction(siginfo.si_signo, NULL, &sa);
176 sa.sa_handler(siginfo.si_signo);
177 if (siginfo.si_signo == SIGUSR2)
178 pthread_cond_signal(&qemu_aio_cond);
179 ret = 1;
181 if (env && vcpu_info[env->cpu_index].stop) {
182 vcpu_info[env->cpu_index].stop = 0;
183 vcpu_info[env->cpu_index].stopped = 1;
184 pthread_kill(vcpu_info[0].thread, SIG_IPI);
186 pthread_mutex_unlock(&qemu_mutex);
188 return ret;
192 static void kvm_eat_signals(CPUState *env, int timeout)
194 int r = 0;
196 while (kvm_eat_signal(env, 0))
197 r = 1;
198 if (!r && timeout) {
199 r = kvm_eat_signal(env, timeout);
200 if (r)
201 while (kvm_eat_signal(env, 0))
205 * we call select() even if no signal was received, to account for
206 * for which there is no signal handler installed.
208 pthread_mutex_lock(&qemu_mutex);
209 cpu_single_env = vcpu->env;
210 if (env->cpu_index == 0)
211 main_loop_wait(0);
212 pthread_mutex_unlock(&qemu_mutex);
215 static void kvm_main_loop_wait(CPUState *env, int timeout)
217 pthread_mutex_unlock(&qemu_mutex);
218 kvm_eat_signals(env, timeout);
219 pthread_mutex_lock(&qemu_mutex);
220 cpu_single_env = env;
221 vcpu_info[env->cpu_index].signalled = 0;
224 static int all_threads_paused(void)
226 int i;
228 for (i = 1; i < smp_cpus; ++i)
229 if (vcpu_info[i].stopped)
230 return 0;
231 return 1;
234 static void pause_other_threads(void)
236 int i;
238 for (i = 1; i < smp_cpus; ++i) {
239 vcpu_info[i].stop = 1;
240 pthread_kill(vcpu_info[i].thread, SIG_IPI);
242 while (!all_threads_paused())
243 kvm_eat_signals(vcpu->env, 0);
246 static void resume_other_threads(void)
248 int i;
250 for (i = 1; i < smp_cpus; ++i) {
251 vcpu_info[i].stop = 0;
252 vcpu_info[i].stopped = 0;
253 pthread_kill(vcpu_info[i].thread, SIG_IPI);
257 static void kvm_vm_state_change_handler(void *context, int running)
259 if (running)
260 resume_other_threads();
261 else
262 pause_other_threads();
265 static void update_regs_for_sipi(CPUState *env)
267 kvm_arch_update_regs_for_sipi(env);
268 vcpu_info[env->cpu_index].sipi_needed = 0;
269 vcpu_info[env->cpu_index].init = 0;
272 static void update_regs_for_init(CPUState *env)
274 cpu_reset(env);
275 kvm_arch_load_regs(env);
278 static void setup_kernel_sigmask(CPUState *env)
280 sigset_t set;
282 sigprocmask(SIG_BLOCK, NULL, &set);
283 sigdelset(&set, SIG_IPI);
284 if (env->cpu_index == 0)
285 sigandset(&set, &set, &io_signal_table.negsigset);
287 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
290 static int kvm_main_loop_cpu(CPUState *env)
292 struct vcpu_info *info = &vcpu_info[env->cpu_index];
294 setup_kernel_sigmask(env);
295 pthread_mutex_lock(&qemu_mutex);
297 kvm_qemu_init_env(env);
298 env->ready_for_interrupt_injection = 1;
299 #ifdef TARGET_I386
300 kvm_tpr_vcpu_start(env);
301 #endif
303 cpu_single_env = env;
304 while (1) {
305 while (!has_work(env))
306 kvm_main_loop_wait(env, 10);
307 if (env->interrupt_request & CPU_INTERRUPT_HARD)
308 env->hflags &= ~HF_HALTED_MASK;
309 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
310 update_regs_for_sipi(env);
311 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
312 update_regs_for_init(env);
313 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
314 kvm_cpu_exec(env);
315 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
316 kvm_main_loop_wait(env, 0);
317 if (qemu_shutdown_requested())
318 break;
319 else if (qemu_powerdown_requested())
320 qemu_system_powerdown();
321 else if (qemu_reset_requested()) {
322 env->interrupt_request = 0;
323 qemu_system_reset();
324 kvm_arch_load_regs(env);
327 pthread_mutex_unlock(&qemu_mutex);
328 return 0;
331 static void *ap_main_loop(void *_env)
333 CPUState *env = _env;
334 sigset_t signals;
336 vcpu = &vcpu_info[env->cpu_index];
337 vcpu->env = env;
338 vcpu->env->thread_id = kvm_get_thread_id();
339 sigfillset(&signals);
340 //sigdelset(&signals, SIG_IPI);
341 sigprocmask(SIG_BLOCK, &signals, NULL);
342 kvm_create_vcpu(kvm_context, env->cpu_index);
343 kvm_qemu_init_env(env);
344 if (kvm_irqchip_in_kernel(kvm_context))
345 env->hflags &= ~HF_HALTED_MASK;
346 kvm_main_loop_cpu(env);
347 return NULL;
350 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
352 sigemptyset(&sigtab->sigset);
353 sigfillset(&sigtab->negsigset);
356 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
358 sigaddset(&sigtab->sigset, signum);
359 sigdelset(&sigtab->negsigset, signum);
362 void kvm_init_new_ap(int cpu, CPUState *env)
364 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
367 int kvm_init_ap(void)
369 CPUState *env = first_cpu->next_cpu;
370 int i;
372 #ifdef TARGET_I386
373 kvm_tpr_opt_setup();
374 #endif
375 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
376 qemu_kvm_init_signal_table(&io_signal_table);
377 kvm_add_signal(&io_signal_table, SIGIO);
378 kvm_add_signal(&io_signal_table, SIGALRM);
379 kvm_add_signal(&io_signal_table, SIGUSR2);
380 kvm_add_signal(&io_signal_table, SIG_IPI);
381 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
383 vcpu = &vcpu_info[0];
384 vcpu->env = first_cpu;
385 vcpu->env->thread_id = kvm_get_thread_id();
386 signal(SIG_IPI, sig_ipi_handler);
387 for (i = 1; i < smp_cpus; ++i) {
388 kvm_init_new_ap(i, env);
389 env = env->next_cpu;
391 return 0;
394 int kvm_main_loop(void)
396 vcpu_info[0].thread = pthread_self();
397 pthread_mutex_unlock(&qemu_mutex);
398 return kvm_main_loop_cpu(first_cpu);
401 static int kvm_debug(void *opaque, int vcpu)
403 CPUState *env = cpu_single_env;
405 env->exception_index = EXCP_DEBUG;
406 return 1;
409 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
411 *data = cpu_inb(0, addr);
412 return 0;
415 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
417 *data = cpu_inw(0, addr);
418 return 0;
421 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
423 *data = cpu_inl(0, addr);
424 return 0;
427 #define PM_IO_BASE 0xb000
429 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
431 if (addr == 0xb2) {
432 switch (data) {
433 case 0: {
434 cpu_outb(0, 0xb3, 0);
435 break;
437 case 0xf0: {
438 unsigned x;
440 /* enable acpi */
441 x = cpu_inw(0, PM_IO_BASE + 4);
442 x &= ~1;
443 cpu_outw(0, PM_IO_BASE + 4, x);
444 break;
446 case 0xf1: {
447 unsigned x;
449 /* enable acpi */
450 x = cpu_inw(0, PM_IO_BASE + 4);
451 x |= 1;
452 cpu_outw(0, PM_IO_BASE + 4, x);
453 break;
455 default:
456 break;
458 return 0;
460 cpu_outb(0, addr, data);
461 return 0;
464 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
466 cpu_outw(0, addr, data);
467 return 0;
470 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
472 cpu_outl(0, addr, data);
473 return 0;
476 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
478 cpu_physical_memory_rw(addr, data, len, 0);
479 return 0;
482 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
484 cpu_physical_memory_rw(addr, data, len, 1);
485 return 0;
488 static int kvm_io_window(void *opaque)
490 return 1;
494 static int kvm_halt(void *opaque, int vcpu)
496 return kvm_arch_halt(opaque, vcpu);
499 static int kvm_shutdown(void *opaque, int vcpu)
501 qemu_system_reset_request();
502 return 1;
505 static struct kvm_callbacks qemu_kvm_ops = {
506 .debug = kvm_debug,
507 .inb = kvm_inb,
508 .inw = kvm_inw,
509 .inl = kvm_inl,
510 .outb = kvm_outb,
511 .outw = kvm_outw,
512 .outl = kvm_outl,
513 .mmio_read = kvm_mmio_read,
514 .mmio_write = kvm_mmio_write,
515 .halt = kvm_halt,
516 .shutdown = kvm_shutdown,
517 .io_window = kvm_io_window,
518 .try_push_interrupts = try_push_interrupts,
519 .post_kvm_run = post_kvm_run,
520 .pre_kvm_run = pre_kvm_run,
521 #ifdef TARGET_I386
522 .tpr_access = handle_tpr_access,
523 #endif
524 #ifdef TARGET_PPC
525 .powerpc_dcr_read = handle_powerpc_dcr_read,
526 .powerpc_dcr_write = handle_powerpc_dcr_write,
527 #endif
530 int kvm_qemu_init()
532 /* Try to initialize kvm */
533 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
534 if (!kvm_context) {
535 return -1;
537 pthread_mutex_lock(&qemu_mutex);
539 return 0;
542 int kvm_qemu_create_context(void)
544 int r;
545 if (!kvm_irqchip) {
546 kvm_disable_irqchip_creation(kvm_context);
548 if (!kvm_pit) {
549 kvm_disable_pit_creation(kvm_context);
551 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
552 kvm_qemu_destroy();
553 return -1;
555 r = kvm_arch_qemu_create_context();
556 if(r <0)
557 kvm_qemu_destroy();
558 return 0;
561 void kvm_qemu_destroy(void)
563 kvm_finalize(kvm_context);
566 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
567 unsigned long size,
568 unsigned long phys_offset)
570 #ifdef KVM_CAP_USER_MEMORY
571 int r = 0;
573 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
574 if (r) {
575 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
576 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
577 if (r)
578 return;
579 r = kvm_is_intersecting_mem(kvm_context, start_addr);
580 if (r)
581 kvm_create_mem_hole(kvm_context, start_addr, size);
582 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
583 phys_ram_base + phys_offset,
584 size, 0);
586 if (phys_offset & IO_MEM_ROM) {
587 phys_offset &= ~IO_MEM_ROM;
588 r = kvm_is_intersecting_mem(kvm_context, start_addr);
589 if (r)
590 kvm_create_mem_hole(kvm_context, start_addr, size);
591 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
592 phys_ram_base + phys_offset,
593 size, 0);
595 if (r < 0) {
596 printf("kvm_cpu_register_physical_memory: failed\n");
597 exit(1);
599 return;
601 #endif
602 if (phys_offset & IO_MEM_ROM) {
603 phys_offset &= ~IO_MEM_ROM;
604 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
608 int kvm_qemu_check_extension(int ext)
610 return kvm_check_extension(kvm_context, ext);
613 int kvm_qemu_init_env(CPUState *cenv)
615 return kvm_arch_qemu_init_env(cenv);
618 int kvm_update_debugger(CPUState *env)
620 struct kvm_debug_guest dbg;
621 int i;
623 dbg.enabled = 0;
624 if (env->nb_breakpoints || env->singlestep_enabled) {
625 dbg.enabled = 1;
626 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
627 dbg.breakpoints[i].enabled = 1;
628 dbg.breakpoints[i].address = env->breakpoints[i];
630 dbg.singlestep = env->singlestep_enabled;
632 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
637 * dirty pages logging
639 /* FIXME: use unsigned long pointer instead of unsigned char */
640 unsigned char *kvm_dirty_bitmap = NULL;
641 int kvm_physical_memory_set_dirty_tracking(int enable)
643 int r = 0;
645 if (!kvm_enabled())
646 return 0;
648 if (enable) {
649 if (!kvm_dirty_bitmap) {
650 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
651 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
652 if (kvm_dirty_bitmap == NULL) {
653 perror("Failed to allocate dirty pages bitmap");
654 r=-1;
656 else {
657 r = kvm_dirty_pages_log_enable_all(kvm_context);
661 else {
662 if (kvm_dirty_bitmap) {
663 r = kvm_dirty_pages_log_reset(kvm_context);
664 qemu_free(kvm_dirty_bitmap);
665 kvm_dirty_bitmap = NULL;
668 return r;
671 /* get kvm's dirty pages bitmap and update qemu's */
672 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
673 unsigned char *bitmap,
674 unsigned int offset,
675 unsigned long mem_size)
677 unsigned int i, j, n=0;
678 unsigned char c;
679 unsigned page_number, addr, addr1;
680 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
683 * bitmap-traveling is faster than memory-traveling (for addr...)
684 * especially when most of the memory is not dirty.
686 for (i=0; i<len; i++) {
687 c = bitmap[i];
688 while (c>0) {
689 j = ffsl(c) - 1;
690 c &= ~(1u<<j);
691 page_number = i * 8 + j;
692 addr1 = page_number * TARGET_PAGE_SIZE;
693 addr = offset + addr1;
694 cpu_physical_memory_set_dirty(addr);
695 n++;
698 return 0;
700 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
701 void *bitmap, void *opaque)
703 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
707 * get kvm's dirty pages bitmap and update qemu's
708 * we only care about physical ram, which resides in slots 0 and 3
710 int kvm_update_dirty_pages_log(void)
712 int r = 0;
715 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
716 kvm_dirty_bitmap, NULL,
717 kvm_get_dirty_bitmap_cb);
718 return r;
721 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
723 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
724 unsigned int brsize = BITMAP_SIZE(ram_size);
725 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
726 unsigned int extra_bytes = (extra_pages +7)/8;
727 unsigned int hole_start = BITMAP_SIZE(0xa0000);
728 unsigned int hole_end = BITMAP_SIZE(0xc0000);
730 memset(bitmap, 0xFF, brsize + extra_bytes);
731 memset(bitmap + hole_start, 0, hole_end - hole_start);
732 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
734 return 0;
737 #ifdef KVM_CAP_IRQCHIP
739 int kvm_set_irq(int irq, int level)
741 return kvm_set_irq_level(kvm_context, irq, level);
744 #endif
746 void qemu_kvm_aio_wait_start(void)
750 void qemu_kvm_aio_wait(void)
752 if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
753 pthread_mutex_unlock(&qemu_mutex);
754 kvm_eat_signal(cpu_single_env, 1000);
755 pthread_mutex_lock(&qemu_mutex);
756 } else {
757 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
761 void qemu_kvm_aio_wait_end(void)
765 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
767 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
770 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
771 unsigned long size, int log, int writable)
773 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
776 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
777 unsigned long size)
779 kvm_destroy_phys_mem(kvm_context, start_addr, size);