Merge branch 'qemu-cvs'
[qemu-kvm/fedora.git] / qemu-kvm.c
blob03ef22fae485353d5b1275faaa0220484a57a9fd
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
14 #include <string.h>
15 #include "hw/hw.h"
16 #include "sysemu.h"
18 #include "qemu-kvm.h"
19 #include <libkvm.h>
20 #include <pthread.h>
21 #include <sys/utsname.h>
23 extern void perror(const char *s);
25 kvm_context_t kvm_context;
27 extern int smp_cpus;
29 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
30 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
31 __thread struct vcpu_info *vcpu;
33 struct qemu_kvm_signal_table {
34 sigset_t sigset;
35 sigset_t negsigset;
38 static struct qemu_kvm_signal_table io_signal_table;
40 #define SIG_IPI (SIGRTMIN+4)
42 struct vcpu_info {
43 CPUState *env;
44 int sipi_needed;
45 int init;
46 pthread_t thread;
47 int signalled;
48 int stop;
49 int stopped;
50 } vcpu_info[4];
52 CPUState *qemu_kvm_cpu_env(int index)
54 return vcpu_info[index].env;
57 static void sig_ipi_handler(int n)
61 void kvm_update_interrupt_request(CPUState *env)
63 if (env && vcpu && env != vcpu->env) {
64 if (vcpu_info[env->cpu_index].signalled)
65 return;
66 vcpu_info[env->cpu_index].signalled = 1;
67 if (vcpu_info[env->cpu_index].thread)
68 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
72 void kvm_update_after_sipi(CPUState *env)
74 vcpu_info[env->cpu_index].sipi_needed = 1;
75 kvm_update_interrupt_request(env);
78 void kvm_apic_init(CPUState *env)
80 if (env->cpu_index != 0)
81 vcpu_info[env->cpu_index].init = 1;
82 kvm_update_interrupt_request(env);
85 #include <signal.h>
87 static int try_push_interrupts(void *opaque)
89 return kvm_arch_try_push_interrupts(opaque);
92 static void post_kvm_run(void *opaque, int vcpu)
95 pthread_mutex_lock(&qemu_mutex);
96 kvm_arch_post_kvm_run(opaque, vcpu);
99 static int pre_kvm_run(void *opaque, int vcpu)
101 CPUState *env = cpu_single_env;
103 kvm_arch_pre_kvm_run(opaque, vcpu);
105 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
106 return 1;
107 pthread_mutex_unlock(&qemu_mutex);
108 return 0;
111 void kvm_load_registers(CPUState *env)
113 if (kvm_enabled())
114 kvm_arch_load_regs(env);
117 void kvm_save_registers(CPUState *env)
119 if (kvm_enabled())
120 kvm_arch_save_regs(env);
123 int kvm_cpu_exec(CPUState *env)
125 int r;
127 r = kvm_run(kvm_context, env->cpu_index);
128 if (r < 0) {
129 printf("kvm_run returned %d\n", r);
130 exit(1);
133 return 0;
136 extern int vm_running;
138 static int has_work(CPUState *env)
140 if (!vm_running)
141 return 0;
142 if (!(env->hflags & HF_HALTED_MASK))
143 return 1;
144 return kvm_arch_has_work(env);
147 static int kvm_eat_signal(CPUState *env, int timeout)
149 struct timespec ts;
150 int r, e, ret = 0;
151 siginfo_t siginfo;
152 struct sigaction sa;
154 ts.tv_sec = timeout / 1000;
155 ts.tv_nsec = (timeout % 1000) * 1000000;
156 r = sigtimedwait(&io_signal_table.sigset, &siginfo, &ts);
157 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
158 return 0;
159 e = errno;
160 pthread_mutex_lock(&qemu_mutex);
161 if (vcpu)
162 cpu_single_env = vcpu->env;
163 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
164 printf("sigtimedwait: %s\n", strerror(e));
165 exit(1);
167 if (r != -1) {
168 sigaction(siginfo.si_signo, NULL, &sa);
169 sa.sa_handler(siginfo.si_signo);
170 if (siginfo.si_signo == SIGUSR2)
171 pthread_cond_signal(&qemu_aio_cond);
172 ret = 1;
174 pthread_mutex_unlock(&qemu_mutex);
176 return ret;
180 static void kvm_eat_signals(CPUState *env, int timeout)
182 int r = 0;
184 while (kvm_eat_signal(env, 0))
185 r = 1;
186 if (!r && timeout) {
187 r = kvm_eat_signal(env, timeout);
188 if (r)
189 while (kvm_eat_signal(env, 0))
193 * we call select() even if no signal was received, to account for
194 * for which there is no signal handler installed.
196 pthread_mutex_lock(&qemu_mutex);
197 cpu_single_env = vcpu->env;
198 main_loop_wait(0);
199 pthread_mutex_unlock(&qemu_mutex);
202 static void kvm_main_loop_wait(CPUState *env, int timeout)
204 pthread_mutex_unlock(&qemu_mutex);
205 if (env->cpu_index == 0)
206 kvm_eat_signals(env, timeout);
207 else {
208 if (!kvm_irqchip_in_kernel(kvm_context) &&
209 (timeout || vcpu_info[env->cpu_index].stopped)) {
210 sigset_t set;
211 int n;
213 paused:
214 sigemptyset(&set);
215 sigaddset(&set, SIG_IPI);
216 sigwait(&set, &n);
217 } else {
218 struct timespec ts;
219 siginfo_t siginfo;
220 sigset_t set;
222 ts.tv_sec = 0;
223 ts.tv_nsec = 0;
224 sigemptyset(&set);
225 sigaddset(&set, SIG_IPI);
226 sigtimedwait(&set, &siginfo, &ts);
228 if (vcpu_info[env->cpu_index].stop) {
229 vcpu_info[env->cpu_index].stop = 0;
230 vcpu_info[env->cpu_index].stopped = 1;
231 pthread_kill(vcpu_info[0].thread, SIG_IPI);
232 goto paused;
235 pthread_mutex_lock(&qemu_mutex);
236 cpu_single_env = env;
237 vcpu_info[env->cpu_index].signalled = 0;
240 static int all_threads_paused(void)
242 int i;
244 for (i = 1; i < smp_cpus; ++i)
245 if (vcpu_info[i].stopped)
246 return 0;
247 return 1;
250 static void pause_other_threads(void)
252 int i;
254 for (i = 1; i < smp_cpus; ++i) {
255 vcpu_info[i].stop = 1;
256 pthread_kill(vcpu_info[i].thread, SIG_IPI);
258 while (!all_threads_paused())
259 kvm_eat_signals(vcpu->env, 0);
262 static void resume_other_threads(void)
264 int i;
266 for (i = 1; i < smp_cpus; ++i) {
267 vcpu_info[i].stop = 0;
268 vcpu_info[i].stopped = 0;
269 pthread_kill(vcpu_info[i].thread, SIG_IPI);
273 static void kvm_vm_state_change_handler(void *context, int running)
275 if (running)
276 resume_other_threads();
277 else
278 pause_other_threads();
281 static void update_regs_for_sipi(CPUState *env)
283 kvm_arch_update_regs_for_sipi(env);
284 vcpu_info[env->cpu_index].sipi_needed = 0;
285 vcpu_info[env->cpu_index].init = 0;
288 static void update_regs_for_init(CPUState *env)
290 cpu_reset(env);
291 kvm_arch_load_regs(env);
294 static void setup_kernel_sigmask(CPUState *env)
296 sigset_t set;
298 sigprocmask(SIG_BLOCK, NULL, &set);
299 sigdelset(&set, SIG_IPI);
300 if (env->cpu_index == 0)
301 sigandset(&set, &set, &io_signal_table.negsigset);
303 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
306 static int kvm_main_loop_cpu(CPUState *env)
308 struct vcpu_info *info = &vcpu_info[env->cpu_index];
310 setup_kernel_sigmask(env);
311 pthread_mutex_lock(&qemu_mutex);
313 kvm_qemu_init_env(env);
314 env->ready_for_interrupt_injection = 1;
315 #ifdef TARGET_I386
316 kvm_tpr_vcpu_start(env);
317 #endif
319 cpu_single_env = env;
320 while (1) {
321 while (!has_work(env))
322 kvm_main_loop_wait(env, 10);
323 if (env->interrupt_request & CPU_INTERRUPT_HARD)
324 env->hflags &= ~HF_HALTED_MASK;
325 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
326 update_regs_for_sipi(env);
327 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
328 update_regs_for_init(env);
329 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
330 kvm_cpu_exec(env);
331 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
332 kvm_main_loop_wait(env, 0);
333 if (qemu_shutdown_requested())
334 break;
335 else if (qemu_powerdown_requested())
336 qemu_system_powerdown();
337 else if (qemu_reset_requested()) {
338 env->interrupt_request = 0;
339 qemu_system_reset();
340 kvm_arch_load_regs(env);
343 pthread_mutex_unlock(&qemu_mutex);
344 return 0;
347 static void *ap_main_loop(void *_env)
349 CPUState *env = _env;
350 sigset_t signals;
352 vcpu = &vcpu_info[env->cpu_index];
353 vcpu->env = env;
354 sigfillset(&signals);
355 //sigdelset(&signals, SIG_IPI);
356 sigprocmask(SIG_BLOCK, &signals, NULL);
357 kvm_create_vcpu(kvm_context, env->cpu_index);
358 kvm_qemu_init_env(env);
359 if (kvm_irqchip_in_kernel(kvm_context))
360 env->hflags &= ~HF_HALTED_MASK;
361 kvm_main_loop_cpu(env);
362 return NULL;
365 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
367 sigemptyset(&sigtab->sigset);
368 sigfillset(&sigtab->negsigset);
371 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
373 sigaddset(&sigtab->sigset, signum);
374 sigdelset(&sigtab->negsigset, signum);
377 int kvm_init_ap(void)
379 CPUState *env = first_cpu->next_cpu;
380 int i;
382 #ifdef TARGET_I386
383 kvm_tpr_opt_setup();
384 #endif
385 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
386 qemu_kvm_init_signal_table(&io_signal_table);
387 kvm_add_signal(&io_signal_table, SIGIO);
388 kvm_add_signal(&io_signal_table, SIGALRM);
389 kvm_add_signal(&io_signal_table, SIGUSR2);
390 kvm_add_signal(&io_signal_table, SIG_IPI);
391 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
393 vcpu = &vcpu_info[0];
394 vcpu->env = first_cpu;
395 signal(SIG_IPI, sig_ipi_handler);
396 for (i = 1; i < smp_cpus; ++i) {
397 pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
398 env = env->next_cpu;
400 return 0;
403 int kvm_main_loop(void)
405 vcpu_info[0].thread = pthread_self();
406 pthread_mutex_unlock(&qemu_mutex);
407 return kvm_main_loop_cpu(first_cpu);
410 static int kvm_debug(void *opaque, int vcpu)
412 CPUState *env = cpu_single_env;
414 env->exception_index = EXCP_DEBUG;
415 return 1;
418 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
420 *data = cpu_inb(0, addr);
421 return 0;
424 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
426 *data = cpu_inw(0, addr);
427 return 0;
430 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
432 *data = cpu_inl(0, addr);
433 return 0;
436 #define PM_IO_BASE 0xb000
438 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
440 if (addr == 0xb2) {
441 switch (data) {
442 case 0: {
443 cpu_outb(0, 0xb3, 0);
444 break;
446 case 0xf0: {
447 unsigned x;
449 /* enable acpi */
450 x = cpu_inw(0, PM_IO_BASE + 4);
451 x &= ~1;
452 cpu_outw(0, PM_IO_BASE + 4, x);
453 break;
455 case 0xf1: {
456 unsigned x;
458 /* enable acpi */
459 x = cpu_inw(0, PM_IO_BASE + 4);
460 x |= 1;
461 cpu_outw(0, PM_IO_BASE + 4, x);
462 break;
464 default:
465 break;
467 return 0;
469 cpu_outb(0, addr, data);
470 return 0;
473 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
475 cpu_outw(0, addr, data);
476 return 0;
479 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
481 cpu_outl(0, addr, data);
482 return 0;
485 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
487 cpu_physical_memory_rw(addr, data, len, 0);
488 return 0;
491 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
493 cpu_physical_memory_rw(addr, data, len, 1);
494 return 0;
497 static int kvm_io_window(void *opaque)
499 return 1;
503 static int kvm_halt(void *opaque, int vcpu)
505 return kvm_arch_halt(opaque, vcpu);
508 static int kvm_shutdown(void *opaque, int vcpu)
510 qemu_system_reset_request();
511 return 1;
514 static struct kvm_callbacks qemu_kvm_ops = {
515 .debug = kvm_debug,
516 .inb = kvm_inb,
517 .inw = kvm_inw,
518 .inl = kvm_inl,
519 .outb = kvm_outb,
520 .outw = kvm_outw,
521 .outl = kvm_outl,
522 .mmio_read = kvm_mmio_read,
523 .mmio_write = kvm_mmio_write,
524 .halt = kvm_halt,
525 .shutdown = kvm_shutdown,
526 .io_window = kvm_io_window,
527 .try_push_interrupts = try_push_interrupts,
528 .post_kvm_run = post_kvm_run,
529 .pre_kvm_run = pre_kvm_run,
530 #ifdef TARGET_I386
531 .tpr_access = handle_tpr_access,
532 #endif
533 #ifdef TARGET_PPC
534 .powerpc_dcr_read = handle_powerpc_dcr_read,
535 .powerpc_dcr_write = handle_powerpc_dcr_write,
536 #endif
539 int kvm_qemu_init()
541 /* Try to initialize kvm */
542 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
543 if (!kvm_context) {
544 return -1;
546 pthread_mutex_lock(&qemu_mutex);
548 return 0;
551 int kvm_qemu_create_context(void)
553 int r;
554 if (!kvm_irqchip) {
555 kvm_disable_irqchip_creation(kvm_context);
557 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
558 kvm_qemu_destroy();
559 return -1;
561 r = kvm_arch_qemu_create_context();
562 if(r <0)
563 kvm_qemu_destroy();
564 return 0;
567 void kvm_qemu_destroy(void)
569 kvm_finalize(kvm_context);
572 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
573 unsigned long size,
574 unsigned long phys_offset)
576 #ifdef KVM_CAP_USER_MEMORY
577 int r = 0;
579 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
580 if (r) {
581 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
582 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
583 if (r)
584 return;
585 r = kvm_is_intersecting_mem(kvm_context, start_addr);
586 if (r)
587 kvm_create_mem_hole(kvm_context, start_addr, size);
588 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
589 phys_ram_base + phys_offset,
590 size, 0);
592 if (phys_offset & IO_MEM_ROM) {
593 phys_offset &= ~IO_MEM_ROM;
594 r = kvm_is_intersecting_mem(kvm_context, start_addr);
595 if (r)
596 kvm_create_mem_hole(kvm_context, start_addr, size);
597 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
598 phys_ram_base + phys_offset,
599 size, 0);
601 if (r < 0) {
602 printf("kvm_cpu_register_physical_memory: failed\n");
603 exit(1);
605 return;
607 #endif
608 if (phys_offset & IO_MEM_ROM) {
609 phys_offset &= ~IO_MEM_ROM;
610 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
614 int kvm_qemu_check_extension(int ext)
616 return kvm_check_extension(kvm_context, ext);
619 int kvm_qemu_init_env(CPUState *cenv)
621 return kvm_arch_qemu_init_env(cenv);
624 int kvm_update_debugger(CPUState *env)
626 struct kvm_debug_guest dbg;
627 int i;
629 dbg.enabled = 0;
630 if (env->nb_breakpoints || env->singlestep_enabled) {
631 dbg.enabled = 1;
632 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
633 dbg.breakpoints[i].enabled = 1;
634 dbg.breakpoints[i].address = env->breakpoints[i];
636 dbg.singlestep = env->singlestep_enabled;
638 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
643 * dirty pages logging
645 /* FIXME: use unsigned long pointer instead of unsigned char */
646 unsigned char *kvm_dirty_bitmap = NULL;
647 int kvm_physical_memory_set_dirty_tracking(int enable)
649 int r = 0;
651 if (!kvm_enabled())
652 return 0;
654 if (enable) {
655 if (!kvm_dirty_bitmap) {
656 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
657 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
658 if (kvm_dirty_bitmap == NULL) {
659 perror("Failed to allocate dirty pages bitmap");
660 r=-1;
662 else {
663 r = kvm_dirty_pages_log_enable_all(kvm_context);
667 else {
668 if (kvm_dirty_bitmap) {
669 r = kvm_dirty_pages_log_reset(kvm_context);
670 qemu_free(kvm_dirty_bitmap);
671 kvm_dirty_bitmap = NULL;
674 return r;
677 /* get kvm's dirty pages bitmap and update qemu's */
678 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
679 unsigned char *bitmap,
680 unsigned int offset,
681 unsigned long mem_size)
683 unsigned int i, j, n=0;
684 unsigned char c;
685 unsigned page_number, addr, addr1;
686 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
689 * bitmap-traveling is faster than memory-traveling (for addr...)
690 * especially when most of the memory is not dirty.
692 for (i=0; i<len; i++) {
693 c = bitmap[i];
694 while (c>0) {
695 j = ffsl(c) - 1;
696 c &= ~(1u<<j);
697 page_number = i * 8 + j;
698 addr1 = page_number * TARGET_PAGE_SIZE;
699 addr = offset + addr1;
700 cpu_physical_memory_set_dirty(addr);
701 n++;
704 return 0;
706 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
707 void *bitmap, void *opaque)
709 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
713 * get kvm's dirty pages bitmap and update qemu's
714 * we only care about physical ram, which resides in slots 0 and 3
716 int kvm_update_dirty_pages_log(void)
718 int r = 0;
721 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
722 kvm_dirty_bitmap, NULL,
723 kvm_get_dirty_bitmap_cb);
724 return r;
727 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
729 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
730 unsigned int brsize = BITMAP_SIZE(ram_size);
731 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
732 unsigned int extra_bytes = (extra_pages +7)/8;
733 unsigned int hole_start = BITMAP_SIZE(0xa0000);
734 unsigned int hole_end = BITMAP_SIZE(0xc0000);
736 memset(bitmap, 0xFF, brsize + extra_bytes);
737 memset(bitmap + hole_start, 0, hole_end - hole_start);
738 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
740 return 0;
743 #ifdef KVM_CAP_IRQCHIP
745 int kvm_set_irq(int irq, int level)
747 return kvm_set_irq_level(kvm_context, irq, level);
750 #endif
752 void qemu_kvm_aio_wait_start(void)
756 void qemu_kvm_aio_wait(void)
758 if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
759 pthread_mutex_unlock(&qemu_mutex);
760 kvm_eat_signal(cpu_single_env, 1000);
761 pthread_mutex_lock(&qemu_mutex);
762 } else {
763 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
767 void qemu_kvm_aio_wait_end(void)
771 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
773 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
776 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
777 unsigned long size, int log, int writable)
779 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
782 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
783 unsigned long size)
785 kvm_destroy_phys_mem(kvm_context, start_addr, size);