Enhance kvm-userspace configure script for powerpc
[qemu-kvm/fedora.git] / qemu-kvm.c
blobec05027ca5c64d5e74bc52b76e89f1db5068cca1
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
6 #define KVM_ALLOWED_DEFAULT 1
7 #else
8 #define KVM_ALLOWED_DEFAULT 0
9 #endif
11 int kvm_allowed = KVM_ALLOWED_DEFAULT;
12 int kvm_irqchip = 1;
14 #ifdef USE_KVM
16 #include <string.h>
17 #include "hw/hw.h"
18 #include "sysemu.h"
20 #include "qemu-kvm.h"
21 #include <libkvm.h>
22 #include <pthread.h>
23 #include <sys/utsname.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
32 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
33 __thread struct vcpu_info *vcpu;
35 struct qemu_kvm_signal_table {
36 sigset_t sigset;
37 sigset_t negsigset;
40 static struct qemu_kvm_signal_table io_signal_table;
42 #define SIG_IPI (SIGRTMIN+4)
44 struct vcpu_info {
45 CPUState *env;
46 int sipi_needed;
47 int init;
48 pthread_t thread;
49 int signalled;
50 int stop;
51 int stopped;
52 } vcpu_info[4];
54 CPUState *qemu_kvm_cpu_env(int index)
56 return vcpu_info[index].env;
59 static void sig_ipi_handler(int n)
63 void kvm_update_interrupt_request(CPUState *env)
65 if (env && vcpu && env != vcpu->env) {
66 if (vcpu_info[env->cpu_index].signalled)
67 return;
68 vcpu_info[env->cpu_index].signalled = 1;
69 if (vcpu_info[env->cpu_index].thread)
70 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
74 void kvm_update_after_sipi(CPUState *env)
76 vcpu_info[env->cpu_index].sipi_needed = 1;
77 kvm_update_interrupt_request(env);
80 void kvm_apic_init(CPUState *env)
82 if (env->cpu_index != 0)
83 vcpu_info[env->cpu_index].init = 1;
84 kvm_update_interrupt_request(env);
87 #include <signal.h>
89 static int try_push_interrupts(void *opaque)
91 return kvm_arch_try_push_interrupts(opaque);
94 static void post_kvm_run(void *opaque, int vcpu)
97 pthread_mutex_lock(&qemu_mutex);
98 kvm_arch_post_kvm_run(opaque, vcpu);
101 static int pre_kvm_run(void *opaque, int vcpu)
103 CPUState *env = cpu_single_env;
105 kvm_arch_pre_kvm_run(opaque, vcpu);
107 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
108 return 1;
109 pthread_mutex_unlock(&qemu_mutex);
110 return 0;
113 void kvm_load_registers(CPUState *env)
115 if (kvm_allowed)
116 kvm_arch_load_regs(env);
119 void kvm_save_registers(CPUState *env)
121 if (kvm_allowed)
122 kvm_arch_save_regs(env);
125 int kvm_cpu_exec(CPUState *env)
127 int r;
129 r = kvm_run(kvm_context, env->cpu_index);
130 if (r < 0) {
131 printf("kvm_run returned %d\n", r);
132 exit(1);
135 return 0;
138 extern int vm_running;
140 static int has_work(CPUState *env)
142 if (!vm_running)
143 return 0;
144 if (!(env->hflags & HF_HALTED_MASK))
145 return 1;
146 return kvm_arch_has_work(env);
149 static int kvm_eat_signal(CPUState *env, int timeout)
151 struct timespec ts;
152 int r, e, ret = 0;
153 siginfo_t siginfo;
154 struct sigaction sa;
156 ts.tv_sec = timeout / 1000;
157 ts.tv_nsec = (timeout % 1000) * 1000000;
158 r = sigtimedwait(&io_signal_table.sigset, &siginfo, &ts);
159 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
160 return 0;
161 e = errno;
162 pthread_mutex_lock(&qemu_mutex);
163 if (vcpu)
164 cpu_single_env = vcpu->env;
165 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
166 printf("sigtimedwait: %s\n", strerror(e));
167 exit(1);
169 if (r != -1) {
170 sigaction(siginfo.si_signo, NULL, &sa);
171 sa.sa_handler(siginfo.si_signo);
172 if (siginfo.si_signo == SIGUSR2)
173 pthread_cond_signal(&qemu_aio_cond);
174 ret = 1;
176 pthread_mutex_unlock(&qemu_mutex);
178 return ret;
182 static void kvm_eat_signals(CPUState *env, int timeout)
184 int r = 0;
186 while (kvm_eat_signal(env, 0))
187 r = 1;
188 if (!r && timeout) {
189 r = kvm_eat_signal(env, timeout);
190 if (r)
191 while (kvm_eat_signal(env, 0))
195 * we call select() even if no signal was received, to account for
196 * for which there is no signal handler installed.
198 pthread_mutex_lock(&qemu_mutex);
199 cpu_single_env = vcpu->env;
200 main_loop_wait(0);
201 pthread_mutex_unlock(&qemu_mutex);
204 static void kvm_main_loop_wait(CPUState *env, int timeout)
206 pthread_mutex_unlock(&qemu_mutex);
207 if (env->cpu_index == 0)
208 kvm_eat_signals(env, timeout);
209 else {
210 if (!kvm_irqchip_in_kernel(kvm_context) &&
211 (timeout || vcpu_info[env->cpu_index].stopped)) {
212 sigset_t set;
213 int n;
215 paused:
216 sigemptyset(&set);
217 sigaddset(&set, SIG_IPI);
218 sigwait(&set, &n);
219 } else {
220 struct timespec ts;
221 siginfo_t siginfo;
222 sigset_t set;
224 ts.tv_sec = 0;
225 ts.tv_nsec = 0;
226 sigemptyset(&set);
227 sigaddset(&set, SIG_IPI);
228 sigtimedwait(&set, &siginfo, &ts);
230 if (vcpu_info[env->cpu_index].stop) {
231 vcpu_info[env->cpu_index].stop = 0;
232 vcpu_info[env->cpu_index].stopped = 1;
233 pthread_kill(vcpu_info[0].thread, SIG_IPI);
234 goto paused;
237 pthread_mutex_lock(&qemu_mutex);
238 cpu_single_env = env;
239 vcpu_info[env->cpu_index].signalled = 0;
242 static int all_threads_paused(void)
244 int i;
246 for (i = 1; i < smp_cpus; ++i)
247 if (vcpu_info[i].stopped)
248 return 0;
249 return 1;
252 static void pause_other_threads(void)
254 int i;
256 for (i = 1; i < smp_cpus; ++i) {
257 vcpu_info[i].stop = 1;
258 pthread_kill(vcpu_info[i].thread, SIG_IPI);
260 while (!all_threads_paused())
261 kvm_eat_signals(vcpu->env, 0);
264 static void resume_other_threads(void)
266 int i;
268 for (i = 1; i < smp_cpus; ++i) {
269 vcpu_info[i].stop = 0;
270 vcpu_info[i].stopped = 0;
271 pthread_kill(vcpu_info[i].thread, SIG_IPI);
275 static void kvm_vm_state_change_handler(void *context, int running)
277 if (running)
278 resume_other_threads();
279 else
280 pause_other_threads();
283 static void update_regs_for_sipi(CPUState *env)
285 kvm_arch_update_regs_for_sipi(env);
286 vcpu_info[env->cpu_index].sipi_needed = 0;
287 vcpu_info[env->cpu_index].init = 0;
290 static void update_regs_for_init(CPUState *env)
292 cpu_reset(env);
293 kvm_arch_load_regs(env);
296 static void setup_kernel_sigmask(CPUState *env)
298 sigset_t set;
300 sigprocmask(SIG_BLOCK, NULL, &set);
301 sigdelset(&set, SIG_IPI);
302 if (env->cpu_index == 0)
303 sigandset(&set, &set, &io_signal_table.negsigset);
305 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
308 static int kvm_main_loop_cpu(CPUState *env)
310 struct vcpu_info *info = &vcpu_info[env->cpu_index];
312 setup_kernel_sigmask(env);
313 pthread_mutex_lock(&qemu_mutex);
315 kvm_qemu_init_env(env);
316 env->ready_for_interrupt_injection = 1;
318 cpu_single_env = env;
319 #ifdef TARGET_I386
320 kvm_tpr_opt_setup(env);
321 #endif
322 while (1) {
323 while (!has_work(env))
324 kvm_main_loop_wait(env, 10);
325 if (env->interrupt_request & CPU_INTERRUPT_HARD)
326 env->hflags &= ~HF_HALTED_MASK;
327 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
328 update_regs_for_sipi(env);
329 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
330 update_regs_for_init(env);
331 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
332 kvm_cpu_exec(env);
333 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
334 kvm_main_loop_wait(env, 0);
335 if (qemu_shutdown_requested())
336 break;
337 else if (qemu_powerdown_requested())
338 qemu_system_powerdown();
339 else if (qemu_reset_requested()) {
340 env->interrupt_request = 0;
341 qemu_system_reset();
342 kvm_arch_load_regs(env);
345 pthread_mutex_unlock(&qemu_mutex);
346 return 0;
349 static void *ap_main_loop(void *_env)
351 CPUState *env = _env;
352 sigset_t signals;
354 vcpu = &vcpu_info[env->cpu_index];
355 vcpu->env = env;
356 sigfillset(&signals);
357 //sigdelset(&signals, SIG_IPI);
358 sigprocmask(SIG_BLOCK, &signals, NULL);
359 kvm_create_vcpu(kvm_context, env->cpu_index);
360 kvm_qemu_init_env(env);
361 if (kvm_irqchip_in_kernel(kvm_context))
362 env->hflags &= ~HF_HALTED_MASK;
363 kvm_main_loop_cpu(env);
364 return NULL;
367 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
369 sigemptyset(&sigtab->sigset);
370 sigfillset(&sigtab->negsigset);
373 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
375 sigaddset(&sigtab->sigset, signum);
376 sigdelset(&sigtab->negsigset, signum);
379 int kvm_init_ap(void)
381 CPUState *env = first_cpu->next_cpu;
382 int i;
384 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
385 qemu_kvm_init_signal_table(&io_signal_table);
386 kvm_add_signal(&io_signal_table, SIGIO);
387 kvm_add_signal(&io_signal_table, SIGALRM);
388 kvm_add_signal(&io_signal_table, SIGUSR2);
389 kvm_add_signal(&io_signal_table, SIG_IPI);
390 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
392 vcpu = &vcpu_info[0];
393 vcpu->env = first_cpu;
394 signal(SIG_IPI, sig_ipi_handler);
395 for (i = 1; i < smp_cpus; ++i) {
396 pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
397 env = env->next_cpu;
399 return 0;
402 int kvm_main_loop(void)
404 vcpu_info[0].thread = pthread_self();
405 pthread_mutex_unlock(&qemu_mutex);
406 return kvm_main_loop_cpu(first_cpu);
409 static int kvm_debug(void *opaque, int vcpu)
411 CPUState *env = cpu_single_env;
413 env->exception_index = EXCP_DEBUG;
414 return 1;
417 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
419 *data = cpu_inb(0, addr);
420 return 0;
423 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
425 *data = cpu_inw(0, addr);
426 return 0;
429 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
431 *data = cpu_inl(0, addr);
432 return 0;
435 #define PM_IO_BASE 0xb000
437 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
439 if (addr == 0xb2) {
440 switch (data) {
441 case 0: {
442 cpu_outb(0, 0xb3, 0);
443 break;
445 case 0xf0: {
446 unsigned x;
448 /* enable acpi */
449 x = cpu_inw(0, PM_IO_BASE + 4);
450 x &= ~1;
451 cpu_outw(0, PM_IO_BASE + 4, x);
452 break;
454 case 0xf1: {
455 unsigned x;
457 /* enable acpi */
458 x = cpu_inw(0, PM_IO_BASE + 4);
459 x |= 1;
460 cpu_outw(0, PM_IO_BASE + 4, x);
461 break;
463 default:
464 break;
466 return 0;
468 cpu_outb(0, addr, data);
469 return 0;
472 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
474 cpu_outw(0, addr, data);
475 return 0;
478 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
480 cpu_outl(0, addr, data);
481 return 0;
484 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
486 cpu_physical_memory_rw(addr, data, len, 0);
487 return 0;
490 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
492 cpu_physical_memory_rw(addr, data, len, 1);
493 return 0;
496 static int kvm_io_window(void *opaque)
498 return 1;
502 static int kvm_halt(void *opaque, int vcpu)
504 return kvm_arch_halt(opaque, vcpu);
507 static int kvm_shutdown(void *opaque, int vcpu)
509 qemu_system_reset_request();
510 return 1;
513 static struct kvm_callbacks qemu_kvm_ops = {
514 .debug = kvm_debug,
515 .inb = kvm_inb,
516 .inw = kvm_inw,
517 .inl = kvm_inl,
518 .outb = kvm_outb,
519 .outw = kvm_outw,
520 .outl = kvm_outl,
521 .mmio_read = kvm_mmio_read,
522 .mmio_write = kvm_mmio_write,
523 .halt = kvm_halt,
524 .shutdown = kvm_shutdown,
525 .io_window = kvm_io_window,
526 .try_push_interrupts = try_push_interrupts,
527 .post_kvm_run = post_kvm_run,
528 .pre_kvm_run = pre_kvm_run,
529 #ifdef TARGET_I386
530 .tpr_access = handle_tpr_access,
531 #endif
532 #ifdef TARGET_PPC
533 .powerpc_dcr_read = handle_powerpc_dcr_read,
534 .powerpc_dcr_write = handle_powerpc_dcr_write,
535 #endif
538 int kvm_qemu_init()
540 /* Try to initialize kvm */
541 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
542 if (!kvm_context) {
543 return -1;
545 pthread_mutex_lock(&qemu_mutex);
547 return 0;
550 int kvm_qemu_create_context(void)
552 int r;
553 if (!kvm_irqchip) {
554 kvm_disable_irqchip_creation(kvm_context);
556 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
557 kvm_qemu_destroy();
558 return -1;
560 r = kvm_arch_qemu_create_context();
561 if(r <0)
562 kvm_qemu_destroy();
563 return 0;
566 void kvm_qemu_destroy(void)
568 kvm_finalize(kvm_context);
571 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
572 unsigned long size,
573 unsigned long phys_offset)
575 #ifdef KVM_CAP_USER_MEMORY
576 int r = 0;
578 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
579 if (r) {
580 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
581 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
582 if (r)
583 return;
584 r = kvm_is_intersecting_mem(kvm_context, start_addr);
585 if (r)
586 kvm_create_mem_hole(kvm_context, start_addr, size);
587 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
588 phys_ram_base + phys_offset,
589 size, 0);
591 if (phys_offset & IO_MEM_ROM) {
592 phys_offset &= ~IO_MEM_ROM;
593 r = kvm_is_intersecting_mem(kvm_context, start_addr);
594 if (r)
595 kvm_create_mem_hole(kvm_context, start_addr, size);
596 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
597 phys_ram_base + phys_offset,
598 size, 0);
600 if (r < 0) {
601 printf("kvm_cpu_register_physical_memory: failed\n");
602 exit(1);
604 return;
606 #endif
607 if (phys_offset & IO_MEM_ROM) {
608 phys_offset &= ~IO_MEM_ROM;
609 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
613 int kvm_qemu_check_extension(int ext)
615 return kvm_check_extension(kvm_context, ext);
618 int kvm_qemu_init_env(CPUState *cenv)
620 return kvm_arch_qemu_init_env(cenv);
623 int kvm_update_debugger(CPUState *env)
625 struct kvm_debug_guest dbg;
626 int i;
628 dbg.enabled = 0;
629 if (env->nb_breakpoints || env->singlestep_enabled) {
630 dbg.enabled = 1;
631 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
632 dbg.breakpoints[i].enabled = 1;
633 dbg.breakpoints[i].address = env->breakpoints[i];
635 dbg.singlestep = env->singlestep_enabled;
637 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
642 * dirty pages logging
644 /* FIXME: use unsigned long pointer instead of unsigned char */
645 unsigned char *kvm_dirty_bitmap = NULL;
646 int kvm_physical_memory_set_dirty_tracking(int enable)
648 int r = 0;
650 if (!kvm_allowed)
651 return 0;
653 if (enable) {
654 if (!kvm_dirty_bitmap) {
655 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
656 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
657 if (kvm_dirty_bitmap == NULL) {
658 perror("Failed to allocate dirty pages bitmap");
659 r=-1;
661 else {
662 r = kvm_dirty_pages_log_enable_all(kvm_context);
666 else {
667 if (kvm_dirty_bitmap) {
668 r = kvm_dirty_pages_log_reset(kvm_context);
669 qemu_free(kvm_dirty_bitmap);
670 kvm_dirty_bitmap = NULL;
673 return r;
676 /* get kvm's dirty pages bitmap and update qemu's */
677 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
678 unsigned char *bitmap,
679 unsigned int offset,
680 unsigned long mem_size)
682 unsigned int i, j, n=0;
683 unsigned char c;
684 unsigned page_number, addr, addr1;
685 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
688 * bitmap-traveling is faster than memory-traveling (for addr...)
689 * especially when most of the memory is not dirty.
691 for (i=0; i<len; i++) {
692 c = bitmap[i];
693 while (c>0) {
694 j = ffsl(c) - 1;
695 c &= ~(1u<<j);
696 page_number = i * 8 + j;
697 addr1 = page_number * TARGET_PAGE_SIZE;
698 addr = offset + addr1;
699 cpu_physical_memory_set_dirty(addr);
700 n++;
703 return 0;
705 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
706 void *bitmap, void *opaque)
708 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
712 * get kvm's dirty pages bitmap and update qemu's
713 * we only care about physical ram, which resides in slots 0 and 3
715 int kvm_update_dirty_pages_log(void)
717 int r = 0;
720 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
721 kvm_dirty_bitmap, NULL,
722 kvm_get_dirty_bitmap_cb);
723 return r;
726 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
728 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
729 unsigned int brsize = BITMAP_SIZE(ram_size);
730 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
731 unsigned int extra_bytes = (extra_pages +7)/8;
732 unsigned int hole_start = BITMAP_SIZE(0xa0000);
733 unsigned int hole_end = BITMAP_SIZE(0xc0000);
735 memset(bitmap, 0xFF, brsize + extra_bytes);
736 memset(bitmap + hole_start, 0, hole_end - hole_start);
737 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
739 return 0;
742 #ifdef KVM_CAP_IRQCHIP
744 int kvm_set_irq(int irq, int level)
746 return kvm_set_irq_level(kvm_context, irq, level);
749 #endif
751 void qemu_kvm_aio_wait_start(void)
755 void qemu_kvm_aio_wait(void)
757 if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
758 pthread_mutex_unlock(&qemu_mutex);
759 kvm_eat_signal(cpu_single_env, 1000);
760 pthread_mutex_lock(&qemu_mutex);
761 } else {
762 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
766 void qemu_kvm_aio_wait_end(void)
770 #endif