Ppc FPRs no longer live in kvm_vcpu
[qemu-kvm/fedora.git] / qemu-kvm.c
blobf7a217a9ec1d856fc6bde1de38263959435db83e
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <string.h>
16 #include "hw/hw.h"
17 #include "sysemu.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
23 #include <sys/syscall.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 static int qemu_kvm_reset_requested;
33 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
34 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
35 __thread struct vcpu_info *vcpu;
37 struct qemu_kvm_signal_table {
38 sigset_t sigset;
39 sigset_t negsigset;
42 static struct qemu_kvm_signal_table io_signal_table;
43 static struct qemu_kvm_signal_table vcpu_signal_table;
45 #define SIG_IPI (SIGRTMIN+4)
47 struct vcpu_info {
48 CPUState *env;
49 int sipi_needed;
50 int init;
51 pthread_t thread;
52 int signalled;
53 int stop;
54 int stopped;
55 int reload_regs;
56 } vcpu_info[256];
58 pthread_t io_thread;
60 static inline unsigned long kvm_get_thread_id(void)
62 return syscall(SYS_gettid);
65 CPUState *qemu_kvm_cpu_env(int index)
67 return vcpu_info[index].env;
70 static void sig_ipi_handler(int n)
74 void kvm_update_interrupt_request(CPUState *env)
76 int signal = 0;
78 if (env) {
79 if (!vcpu)
80 signal = 1;
81 if (vcpu && env != vcpu->env && !vcpu_info[env->cpu_index].signalled)
82 signal = 1;
84 if (signal) {
85 vcpu_info[env->cpu_index].signalled = 1;
86 if (vcpu_info[env->cpu_index].thread)
87 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
92 void kvm_update_after_sipi(CPUState *env)
94 vcpu_info[env->cpu_index].sipi_needed = 1;
95 kvm_update_interrupt_request(env);
98 void kvm_apic_init(CPUState *env)
100 if (env->cpu_index != 0)
101 vcpu_info[env->cpu_index].init = 1;
102 kvm_update_interrupt_request(env);
105 #include <signal.h>
107 static int try_push_interrupts(void *opaque)
109 return kvm_arch_try_push_interrupts(opaque);
112 static void post_kvm_run(void *opaque, int vcpu)
115 pthread_mutex_lock(&qemu_mutex);
116 kvm_arch_post_kvm_run(opaque, vcpu);
119 static int pre_kvm_run(void *opaque, int vcpu)
121 CPUState *env = qemu_kvm_cpu_env(vcpu);
123 kvm_arch_pre_kvm_run(opaque, vcpu);
125 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
126 return 1;
127 pthread_mutex_unlock(&qemu_mutex);
128 return 0;
131 void kvm_load_registers(CPUState *env)
133 if (kvm_enabled())
134 kvm_arch_load_regs(env);
137 void kvm_save_registers(CPUState *env)
139 if (kvm_enabled())
140 kvm_arch_save_regs(env);
143 int kvm_cpu_exec(CPUState *env)
145 int r;
147 r = kvm_run(kvm_context, env->cpu_index);
148 if (r < 0) {
149 printf("kvm_run returned %d\n", r);
150 exit(1);
153 return 0;
156 extern int vm_running;
158 static int has_work(CPUState *env)
160 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
161 return 0;
162 if (!(env->hflags & HF_HALTED_MASK))
163 return 1;
164 return kvm_arch_has_work(env);
167 static int kvm_process_signal(int si_signo)
169 struct sigaction sa;
171 switch (si_signo) {
172 case SIGUSR2:
173 pthread_cond_signal(&qemu_aio_cond);
174 break;
175 case SIGALRM:
176 case SIGIO:
177 sigaction(si_signo, NULL, &sa);
178 sa.sa_handler(si_signo);
179 break;
182 return 1;
185 static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
186 int timeout)
188 struct timespec ts;
189 int r, e, ret = 0;
190 siginfo_t siginfo;
192 ts.tv_sec = timeout / 1000;
193 ts.tv_nsec = (timeout % 1000) * 1000000;
194 r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
195 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
196 return 0;
197 e = errno;
198 pthread_mutex_lock(&qemu_mutex);
199 if (env && vcpu)
200 cpu_single_env = vcpu->env;
201 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
202 printf("sigtimedwait: %s\n", strerror(e));
203 exit(1);
205 if (r != -1)
206 ret = kvm_process_signal(siginfo.si_signo);
208 if (env && vcpu_info[env->cpu_index].stop) {
209 vcpu_info[env->cpu_index].stop = 0;
210 vcpu_info[env->cpu_index].stopped = 1;
211 pthread_kill(io_thread, SIGUSR1);
213 pthread_mutex_unlock(&qemu_mutex);
215 return ret;
219 static void kvm_eat_signals(CPUState *env, int timeout)
221 int r = 0;
222 struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
224 while (kvm_eat_signal(waitset, env, 0))
225 r = 1;
226 if (!r && timeout) {
227 r = kvm_eat_signal(waitset, env, timeout);
228 if (r)
229 while (kvm_eat_signal(waitset, env, 0))
234 static void kvm_main_loop_wait(CPUState *env, int timeout)
236 pthread_mutex_unlock(&qemu_mutex);
237 kvm_eat_signals(env, timeout);
238 pthread_mutex_lock(&qemu_mutex);
239 cpu_single_env = env;
240 vcpu_info[env->cpu_index].signalled = 0;
243 static int all_threads_paused(void)
245 int i;
247 for (i = 0; i < smp_cpus; ++i)
248 if (vcpu_info[i].stop)
249 return 0;
250 return 1;
253 static void pause_all_threads(void)
255 int i;
257 for (i = 0; i < smp_cpus; ++i) {
258 vcpu_info[i].stop = 1;
259 pthread_kill(vcpu_info[i].thread, SIG_IPI);
261 while (!all_threads_paused()) {
262 pthread_mutex_unlock(&qemu_mutex);
263 kvm_eat_signal(&io_signal_table, NULL, 1000);
264 pthread_mutex_lock(&qemu_mutex);
265 cpu_single_env = NULL;
269 static void resume_all_threads(void)
271 int i;
273 for (i = 0; i < smp_cpus; ++i) {
274 vcpu_info[i].stop = 0;
275 vcpu_info[i].stopped = 0;
276 pthread_kill(vcpu_info[i].thread, SIG_IPI);
280 static void kvm_vm_state_change_handler(void *context, int running)
282 if (running)
283 resume_all_threads();
284 else
285 pause_all_threads();
288 static void update_regs_for_sipi(CPUState *env)
290 kvm_arch_update_regs_for_sipi(env);
291 vcpu_info[env->cpu_index].sipi_needed = 0;
292 vcpu_info[env->cpu_index].init = 0;
295 static void update_regs_for_init(CPUState *env)
297 cpu_reset(env);
298 kvm_arch_load_regs(env);
301 static void setup_kernel_sigmask(CPUState *env)
303 sigset_t set;
305 sigprocmask(SIG_BLOCK, NULL, &set);
306 sigdelset(&set, SIG_IPI);
308 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
311 void qemu_kvm_system_reset_request(void)
313 int i;
315 for (i = 0; i < smp_cpus; ++i) {
316 vcpu_info[i].reload_regs = 1;
317 pthread_kill(vcpu_info[i].thread, SIG_IPI);
319 qemu_system_reset();
322 static int kvm_main_loop_cpu(CPUState *env)
324 struct vcpu_info *info = &vcpu_info[env->cpu_index];
326 setup_kernel_sigmask(env);
327 pthread_mutex_lock(&qemu_mutex);
329 kvm_qemu_init_env(env);
330 env->ready_for_interrupt_injection = 1;
331 #ifdef TARGET_I386
332 kvm_tpr_vcpu_start(env);
333 #endif
335 cpu_single_env = env;
336 while (1) {
337 while (!has_work(env))
338 kvm_main_loop_wait(env, 10);
339 if (env->interrupt_request & CPU_INTERRUPT_HARD)
340 env->hflags &= ~HF_HALTED_MASK;
341 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
342 update_regs_for_sipi(env);
343 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
344 update_regs_for_init(env);
345 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
346 kvm_cpu_exec(env);
347 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
348 kvm_main_loop_wait(env, 0);
349 if (info->reload_regs) {
350 info->reload_regs = 0;
351 if (env->cpu_index == 0) /* ap needs to be placed in INIT */
352 kvm_arch_load_regs(env);
355 pthread_mutex_unlock(&qemu_mutex);
356 return 0;
359 static void *ap_main_loop(void *_env)
361 CPUState *env = _env;
362 sigset_t signals;
364 vcpu = &vcpu_info[env->cpu_index];
365 vcpu->env = env;
366 vcpu->env->thread_id = kvm_get_thread_id();
367 sigfillset(&signals);
368 sigprocmask(SIG_BLOCK, &signals, NULL);
369 kvm_create_vcpu(kvm_context, env->cpu_index);
370 kvm_qemu_init_env(env);
371 if (kvm_irqchip_in_kernel(kvm_context))
372 env->hflags &= ~HF_HALTED_MASK;
373 kvm_main_loop_cpu(env);
374 return NULL;
377 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
379 sigemptyset(&sigtab->sigset);
380 sigfillset(&sigtab->negsigset);
383 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
385 sigaddset(&sigtab->sigset, signum);
386 sigdelset(&sigtab->negsigset, signum);
389 void kvm_init_new_ap(int cpu, CPUState *env)
391 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
392 /* FIXME: wait for thread to spin up */
393 usleep(200);
396 static void qemu_kvm_init_signal_tables(void)
398 qemu_kvm_init_signal_table(&io_signal_table);
399 qemu_kvm_init_signal_table(&vcpu_signal_table);
401 kvm_add_signal(&io_signal_table, SIGIO);
402 kvm_add_signal(&io_signal_table, SIGALRM);
403 kvm_add_signal(&io_signal_table, SIGUSR1);
404 kvm_add_signal(&io_signal_table, SIGUSR2);
406 kvm_add_signal(&vcpu_signal_table, SIG_IPI);
408 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
411 int kvm_init_ap(void)
413 #ifdef TARGET_I386
414 kvm_tpr_opt_setup();
415 #endif
416 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
417 qemu_kvm_init_signal_tables();
419 signal(SIG_IPI, sig_ipi_handler);
420 return 0;
423 void qemu_kvm_notify_work(void)
425 if (io_thread)
426 pthread_kill(io_thread, SIGUSR1);
430 * The IO thread has all signals that inform machine events
431 * blocked (io_signal_table), so it won't get interrupted
432 * while processing in main_loop_wait().
435 int kvm_main_loop(void)
437 io_thread = pthread_self();
438 pthread_mutex_unlock(&qemu_mutex);
439 while (1) {
440 kvm_eat_signal(&io_signal_table, NULL, 1000);
441 pthread_mutex_lock(&qemu_mutex);
442 cpu_single_env = NULL;
443 main_loop_wait(0);
444 if (qemu_shutdown_requested())
445 break;
446 else if (qemu_powerdown_requested())
447 qemu_system_powerdown();
448 else if (qemu_reset_requested()) {
449 pthread_kill(vcpu_info[0].thread, SIG_IPI);
450 qemu_kvm_reset_requested = 1;
452 pthread_mutex_unlock(&qemu_mutex);
455 pthread_mutex_unlock(&qemu_mutex);
456 return 0;
459 static int kvm_debug(void *opaque, int vcpu)
461 CPUState *env = cpu_single_env;
463 env->exception_index = EXCP_DEBUG;
464 return 1;
467 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
469 *data = cpu_inb(0, addr);
470 return 0;
473 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
475 *data = cpu_inw(0, addr);
476 return 0;
479 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
481 *data = cpu_inl(0, addr);
482 return 0;
485 #define PM_IO_BASE 0xb000
487 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
489 if (addr == 0xb2) {
490 switch (data) {
491 case 0: {
492 cpu_outb(0, 0xb3, 0);
493 break;
495 case 0xf0: {
496 unsigned x;
498 /* enable acpi */
499 x = cpu_inw(0, PM_IO_BASE + 4);
500 x &= ~1;
501 cpu_outw(0, PM_IO_BASE + 4, x);
502 break;
504 case 0xf1: {
505 unsigned x;
507 /* enable acpi */
508 x = cpu_inw(0, PM_IO_BASE + 4);
509 x |= 1;
510 cpu_outw(0, PM_IO_BASE + 4, x);
511 break;
513 default:
514 break;
516 return 0;
518 cpu_outb(0, addr, data);
519 return 0;
522 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
524 cpu_outw(0, addr, data);
525 return 0;
528 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
530 cpu_outl(0, addr, data);
531 return 0;
534 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
536 cpu_physical_memory_rw(addr, data, len, 0);
537 return 0;
540 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
542 cpu_physical_memory_rw(addr, data, len, 1);
543 return 0;
546 static int kvm_io_window(void *opaque)
548 return 1;
552 static int kvm_halt(void *opaque, int vcpu)
554 return kvm_arch_halt(opaque, vcpu);
557 static int kvm_shutdown(void *opaque, int vcpu)
559 qemu_system_reset_request();
560 return 1;
563 static struct kvm_callbacks qemu_kvm_ops = {
564 .debug = kvm_debug,
565 .inb = kvm_inb,
566 .inw = kvm_inw,
567 .inl = kvm_inl,
568 .outb = kvm_outb,
569 .outw = kvm_outw,
570 .outl = kvm_outl,
571 .mmio_read = kvm_mmio_read,
572 .mmio_write = kvm_mmio_write,
573 .halt = kvm_halt,
574 .shutdown = kvm_shutdown,
575 .io_window = kvm_io_window,
576 .try_push_interrupts = try_push_interrupts,
577 .post_kvm_run = post_kvm_run,
578 .pre_kvm_run = pre_kvm_run,
579 #ifdef TARGET_I386
580 .tpr_access = handle_tpr_access,
581 #endif
582 #ifdef TARGET_PPC
583 .powerpc_dcr_read = handle_powerpc_dcr_read,
584 .powerpc_dcr_write = handle_powerpc_dcr_write,
585 #endif
588 int kvm_qemu_init()
590 /* Try to initialize kvm */
591 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
592 if (!kvm_context) {
593 return -1;
595 pthread_mutex_lock(&qemu_mutex);
597 return 0;
600 int kvm_qemu_create_context(void)
602 int r;
603 if (!kvm_irqchip) {
604 kvm_disable_irqchip_creation(kvm_context);
606 if (!kvm_pit) {
607 kvm_disable_pit_creation(kvm_context);
609 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
610 kvm_qemu_destroy();
611 return -1;
613 r = kvm_arch_qemu_create_context();
614 if(r <0)
615 kvm_qemu_destroy();
616 return 0;
619 void kvm_qemu_destroy(void)
621 kvm_finalize(kvm_context);
624 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
625 unsigned long size,
626 unsigned long phys_offset)
628 #ifdef KVM_CAP_USER_MEMORY
629 int r = 0;
631 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
632 if (r) {
633 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
634 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
635 if (r)
636 return;
637 r = kvm_is_intersecting_mem(kvm_context, start_addr);
638 if (r)
639 kvm_create_mem_hole(kvm_context, start_addr, size);
640 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
641 phys_ram_base + phys_offset,
642 size, 0);
644 if (phys_offset & IO_MEM_ROM) {
645 phys_offset &= ~IO_MEM_ROM;
646 r = kvm_is_intersecting_mem(kvm_context, start_addr);
647 if (r)
648 kvm_create_mem_hole(kvm_context, start_addr, size);
649 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
650 phys_ram_base + phys_offset,
651 size, 0);
653 if (r < 0) {
654 printf("kvm_cpu_register_physical_memory: failed\n");
655 exit(1);
657 return;
659 #endif
660 if (phys_offset & IO_MEM_ROM) {
661 phys_offset &= ~IO_MEM_ROM;
662 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
666 int kvm_qemu_check_extension(int ext)
668 return kvm_check_extension(kvm_context, ext);
671 int kvm_qemu_init_env(CPUState *cenv)
673 return kvm_arch_qemu_init_env(cenv);
676 int kvm_update_debugger(CPUState *env)
678 struct kvm_debug_guest dbg;
679 int i;
681 dbg.enabled = 0;
682 if (env->nb_breakpoints || env->singlestep_enabled) {
683 dbg.enabled = 1;
684 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
685 dbg.breakpoints[i].enabled = 1;
686 dbg.breakpoints[i].address = env->breakpoints[i];
688 dbg.singlestep = env->singlestep_enabled;
690 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
695 * dirty pages logging
697 /* FIXME: use unsigned long pointer instead of unsigned char */
698 unsigned char *kvm_dirty_bitmap = NULL;
699 int kvm_physical_memory_set_dirty_tracking(int enable)
701 int r = 0;
703 if (!kvm_enabled())
704 return 0;
706 if (enable) {
707 if (!kvm_dirty_bitmap) {
708 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
709 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
710 if (kvm_dirty_bitmap == NULL) {
711 perror("Failed to allocate dirty pages bitmap");
712 r=-1;
714 else {
715 r = kvm_dirty_pages_log_enable_all(kvm_context);
719 else {
720 if (kvm_dirty_bitmap) {
721 r = kvm_dirty_pages_log_reset(kvm_context);
722 qemu_free(kvm_dirty_bitmap);
723 kvm_dirty_bitmap = NULL;
726 return r;
729 /* get kvm's dirty pages bitmap and update qemu's */
730 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
731 unsigned char *bitmap,
732 unsigned int offset,
733 unsigned long mem_size)
735 unsigned int i, j, n=0;
736 unsigned char c;
737 unsigned page_number, addr, addr1;
738 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
741 * bitmap-traveling is faster than memory-traveling (for addr...)
742 * especially when most of the memory is not dirty.
744 for (i=0; i<len; i++) {
745 c = bitmap[i];
746 while (c>0) {
747 j = ffsl(c) - 1;
748 c &= ~(1u<<j);
749 page_number = i * 8 + j;
750 addr1 = page_number * TARGET_PAGE_SIZE;
751 addr = offset + addr1;
752 cpu_physical_memory_set_dirty(addr);
753 n++;
756 return 0;
758 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
759 void *bitmap, void *opaque)
761 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
765 * get kvm's dirty pages bitmap and update qemu's
766 * we only care about physical ram, which resides in slots 0 and 3
768 int kvm_update_dirty_pages_log(void)
770 int r = 0;
773 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
774 kvm_dirty_bitmap, NULL,
775 kvm_get_dirty_bitmap_cb);
776 return r;
779 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
781 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
782 unsigned int brsize = BITMAP_SIZE(ram_size);
783 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
784 unsigned int extra_bytes = (extra_pages +7)/8;
785 unsigned int hole_start = BITMAP_SIZE(0xa0000);
786 unsigned int hole_end = BITMAP_SIZE(0xc0000);
788 memset(bitmap, 0xFF, brsize + extra_bytes);
789 memset(bitmap + hole_start, 0, hole_end - hole_start);
790 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
792 return 0;
795 #ifdef KVM_CAP_IRQCHIP
797 int kvm_set_irq(int irq, int level)
799 return kvm_set_irq_level(kvm_context, irq, level);
802 #endif
804 void qemu_kvm_aio_wait_start(void)
808 void qemu_kvm_aio_wait(void)
810 CPUState *cpu_single = cpu_single_env;
812 if (!cpu_single_env) {
813 pthread_mutex_unlock(&qemu_mutex);
814 kvm_eat_signal(&io_signal_table, NULL, 1000);
815 pthread_mutex_lock(&qemu_mutex);
816 cpu_single_env = NULL;
817 } else {
818 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
819 cpu_single_env = cpu_single;
823 void qemu_kvm_aio_wait_end(void)
827 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
829 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
832 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
833 unsigned long size, int log, int writable)
835 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
838 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
839 unsigned long size)
841 kvm_destroy_phys_mem(kvm_context, start_addr, size);