kvm: configure: Update the configure script to make it work for ia64
[qemu-kvm/amd-iommu.git] / qemu-kvm.c
blob3aeba39001d5075a441f90ed9429320d3651ba9f
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
6 #define KVM_ALLOWED_DEFAULT 1
7 #else
8 #define KVM_ALLOWED_DEFAULT 0
9 #endif
11 int kvm_allowed = KVM_ALLOWED_DEFAULT;
12 int kvm_irqchip = 1;
14 #ifdef USE_KVM
16 #include <string.h>
17 #include "vl.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
24 extern void perror(const char *s);
26 kvm_context_t kvm_context;
28 extern int smp_cpus;
30 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
31 __thread CPUState *vcpu_env;
33 static sigset_t io_sigset, io_negsigset;
35 static int wait_hack;
37 #define SIG_IPI (SIGRTMIN+4)
39 struct vcpu_info {
40 int sipi_needed;
41 int init;
42 pthread_t thread;
43 int signalled;
44 int stop;
45 int stopped;
46 } vcpu_info[4];
48 static void sig_ipi_handler(int n)
52 void kvm_update_interrupt_request(CPUState *env)
54 if (env && env != vcpu_env) {
55 if (vcpu_info[env->cpu_index].signalled)
56 return;
57 vcpu_info[env->cpu_index].signalled = 1;
58 if (vcpu_info[env->cpu_index].thread)
59 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
63 void kvm_update_after_sipi(CPUState *env)
65 vcpu_info[env->cpu_index].sipi_needed = 1;
66 kvm_update_interrupt_request(env);
69 * the qemu bios waits using a busy loop that's much too short for
70 * kvm. add a wait after the first sipi.
73 static int first_sipi = 1;
75 if (first_sipi) {
76 wait_hack = 1;
77 first_sipi = 0;
82 void kvm_apic_init(CPUState *env)
84 if (env->cpu_index != 0)
85 vcpu_info[env->cpu_index].init = 1;
86 kvm_update_interrupt_request(env);
89 #include <signal.h>
91 static int try_push_interrupts(void *opaque)
93 return kvm_arch_try_push_interrupts(opaque);
96 static void post_kvm_run(void *opaque, int vcpu)
99 pthread_mutex_lock(&qemu_mutex);
100 kvm_arch_post_kvm_run(opaque, vcpu);
103 static int pre_kvm_run(void *opaque, int vcpu)
105 CPUState *env = cpu_single_env;
107 if (env->cpu_index == 0 && wait_hack) {
108 int i;
110 wait_hack = 0;
112 pthread_mutex_unlock(&qemu_mutex);
113 for (i = 0; i < 10; ++i)
114 usleep(1000);
115 pthread_mutex_lock(&qemu_mutex);
118 kvm_arch_pre_kvm_run(opaque, vcpu);
120 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
121 return 1;
122 pthread_mutex_unlock(&qemu_mutex);
123 return 0;
126 void kvm_load_registers(CPUState *env)
128 if (kvm_allowed)
129 kvm_arch_load_regs(env);
132 void kvm_save_registers(CPUState *env)
134 if (kvm_allowed)
135 kvm_arch_save_regs(env);
138 int kvm_cpu_exec(CPUState *env)
140 int r;
142 r = kvm_run(kvm_context, env->cpu_index);
143 if (r < 0) {
144 printf("kvm_run returned %d\n", r);
145 exit(1);
148 return 0;
151 extern int vm_running;
153 static int has_work(CPUState *env)
155 if (!vm_running)
156 return 0;
157 if (!(env->hflags & HF_HALTED_MASK))
158 return 1;
159 return kvm_arch_has_work(env);
162 static int kvm_eat_signal(CPUState *env, int timeout)
164 struct timespec ts;
165 int r, e, ret = 0;
166 siginfo_t siginfo;
167 struct sigaction sa;
169 ts.tv_sec = timeout / 1000;
170 ts.tv_nsec = (timeout % 1000) * 1000000;
171 r = sigtimedwait(&io_sigset, &siginfo, &ts);
172 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
173 return 0;
174 e = errno;
175 pthread_mutex_lock(&qemu_mutex);
176 cpu_single_env = vcpu_env;
177 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
178 printf("sigtimedwait: %s\n", strerror(e));
179 exit(1);
181 if (r != -1) {
182 sigaction(siginfo.si_signo, NULL, &sa);
183 sa.sa_handler(siginfo.si_signo);
184 ret = 1;
186 pthread_mutex_unlock(&qemu_mutex);
188 return ret;
192 static void kvm_eat_signals(CPUState *env, int timeout)
194 int r = 0;
196 while (kvm_eat_signal(env, 0))
197 r = 1;
198 if (!r && timeout) {
199 r = kvm_eat_signal(env, timeout);
200 if (r)
201 while (kvm_eat_signal(env, 0))
205 * we call select() even if no signal was received, to account for
206 * for which there is no signal handler installed.
208 pthread_mutex_lock(&qemu_mutex);
209 cpu_single_env = vcpu_env;
210 main_loop_wait(0);
211 pthread_mutex_unlock(&qemu_mutex);
214 static void kvm_main_loop_wait(CPUState *env, int timeout)
216 pthread_mutex_unlock(&qemu_mutex);
217 if (env->cpu_index == 0)
218 kvm_eat_signals(env, timeout);
219 else {
220 if (!kvm_irqchip_in_kernel(kvm_context) &&
221 (timeout || vcpu_info[env->cpu_index].stopped)) {
222 sigset_t set;
223 int n;
225 paused:
226 sigemptyset(&set);
227 sigaddset(&set, SIG_IPI);
228 sigwait(&set, &n);
229 } else {
230 struct timespec ts;
231 siginfo_t siginfo;
232 sigset_t set;
234 ts.tv_sec = 0;
235 ts.tv_nsec = 0;
236 sigemptyset(&set);
237 sigaddset(&set, SIG_IPI);
238 sigtimedwait(&set, &siginfo, &ts);
240 if (vcpu_info[env->cpu_index].stop) {
241 vcpu_info[env->cpu_index].stop = 0;
242 vcpu_info[env->cpu_index].stopped = 1;
243 pthread_kill(vcpu_info[0].thread, SIG_IPI);
244 goto paused;
247 pthread_mutex_lock(&qemu_mutex);
248 cpu_single_env = env;
249 vcpu_info[env->cpu_index].signalled = 0;
252 static int all_threads_paused(void)
254 int i;
256 for (i = 1; i < smp_cpus; ++i)
257 if (vcpu_info[i].stopped)
258 return 0;
259 return 1;
262 static void pause_other_threads(void)
264 int i;
266 for (i = 1; i < smp_cpus; ++i) {
267 vcpu_info[i].stop = 1;
268 pthread_kill(vcpu_info[i].thread, SIG_IPI);
270 while (!all_threads_paused())
271 kvm_eat_signals(vcpu_env, 0);
274 static void resume_other_threads(void)
276 int i;
278 for (i = 1; i < smp_cpus; ++i) {
279 vcpu_info[i].stop = 0;
280 vcpu_info[i].stopped = 0;
281 pthread_kill(vcpu_info[i].thread, SIG_IPI);
285 static void kvm_vm_state_change_handler(void *context, int running)
287 if (running)
288 resume_other_threads();
289 else
290 pause_other_threads();
293 static void update_regs_for_sipi(CPUState *env)
295 kvm_arch_update_regs_for_sipi(env);
296 vcpu_info[env->cpu_index].sipi_needed = 0;
297 vcpu_info[env->cpu_index].init = 0;
300 static void update_regs_for_init(CPUState *env)
302 cpu_reset(env);
303 kvm_arch_load_regs(env);
306 static void setup_kernel_sigmask(CPUState *env)
308 sigset_t set;
310 sigprocmask(SIG_BLOCK, NULL, &set);
311 sigdelset(&set, SIG_IPI);
312 if (env->cpu_index == 0)
313 sigandset(&set, &set, &io_negsigset);
315 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
318 static int kvm_main_loop_cpu(CPUState *env)
320 struct vcpu_info *info = &vcpu_info[env->cpu_index];
322 setup_kernel_sigmask(env);
323 pthread_mutex_lock(&qemu_mutex);
324 cpu_single_env = env;
325 while (1) {
326 while (!has_work(env))
327 kvm_main_loop_wait(env, 10);
328 if (env->interrupt_request & CPU_INTERRUPT_HARD)
329 env->hflags &= ~HF_HALTED_MASK;
330 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
331 update_regs_for_sipi(env);
332 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
333 update_regs_for_init(env);
334 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
335 kvm_cpu_exec(env);
336 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
337 kvm_main_loop_wait(env, 0);
338 if (qemu_shutdown_requested())
339 break;
340 else if (qemu_powerdown_requested())
341 qemu_system_powerdown();
342 else if (qemu_reset_requested()) {
343 env->interrupt_request = 0;
344 qemu_system_reset();
345 kvm_arch_load_regs(env);
348 pthread_mutex_unlock(&qemu_mutex);
349 return 0;
352 static void *ap_main_loop(void *_env)
354 CPUState *env = _env;
355 sigset_t signals;
357 vcpu_env = env;
358 sigfillset(&signals);
359 //sigdelset(&signals, SIG_IPI);
360 sigprocmask(SIG_BLOCK, &signals, NULL);
361 kvm_create_vcpu(kvm_context, env->cpu_index);
362 kvm_qemu_init_env(env);
363 if (kvm_irqchip_in_kernel(kvm_context))
364 env->hflags &= ~HF_HALTED_MASK;
365 kvm_main_loop_cpu(env);
366 return NULL;
369 static void kvm_add_signal(int signum)
371 sigaddset(&io_sigset, signum);
372 sigdelset(&io_negsigset, signum);
373 sigprocmask(SIG_BLOCK, &io_sigset, NULL);
376 int kvm_init_ap(void)
378 CPUState *env = first_cpu->next_cpu;
379 int i;
381 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
382 sigemptyset(&io_sigset);
383 sigfillset(&io_negsigset);
384 kvm_add_signal(SIGIO);
385 kvm_add_signal(SIGALRM);
386 kvm_add_signal(SIGUSR2);
387 if (!kvm_irqchip_in_kernel(kvm_context))
388 kvm_add_signal(SIG_IPI);
390 vcpu_env = first_cpu;
391 signal(SIG_IPI, sig_ipi_handler);
392 for (i = 1; i < smp_cpus; ++i) {
393 pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
394 env = env->next_cpu;
396 return 0;
399 int kvm_main_loop(void)
401 vcpu_info[0].thread = pthread_self();
402 return kvm_main_loop_cpu(first_cpu);
405 static int kvm_debug(void *opaque, int vcpu)
407 CPUState *env = cpu_single_env;
409 env->exception_index = EXCP_DEBUG;
410 return 1;
413 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
415 *data = cpu_inb(0, addr);
416 return 0;
419 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
421 *data = cpu_inw(0, addr);
422 return 0;
425 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
427 *data = cpu_inl(0, addr);
428 return 0;
431 #define PM_IO_BASE 0xb000
433 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
435 if (addr == 0xb2) {
436 switch (data) {
437 case 0: {
438 cpu_outb(0, 0xb3, 0);
439 break;
441 case 0xf0: {
442 unsigned x;
444 /* enable acpi */
445 x = cpu_inw(0, PM_IO_BASE + 4);
446 x &= ~1;
447 cpu_outw(0, PM_IO_BASE + 4, x);
448 break;
450 case 0xf1: {
451 unsigned x;
453 /* enable acpi */
454 x = cpu_inw(0, PM_IO_BASE + 4);
455 x |= 1;
456 cpu_outw(0, PM_IO_BASE + 4, x);
457 break;
459 default:
460 break;
462 return 0;
464 cpu_outb(0, addr, data);
465 return 0;
468 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
470 cpu_outw(0, addr, data);
471 return 0;
474 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
476 cpu_outl(0, addr, data);
477 return 0;
480 static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
482 *data = ldub_phys(addr);
483 return 0;
486 static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
488 *data = lduw_phys(addr);
489 return 0;
492 static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
494 /* hack: Red Hat 7.1 generates some wierd accesses. */
495 if (addr > 0xa0000 - 4 && addr < 0xa0000) {
496 *data = 0;
497 return 0;
500 *data = ldl_phys(addr);
501 return 0;
504 static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
506 *data = ldq_phys(addr);
507 return 0;
510 static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
512 stb_phys(addr, data);
513 return 0;
516 static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
518 stw_phys(addr, data);
519 return 0;
522 static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
524 stl_phys(addr, data);
525 return 0;
528 static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
530 stq_phys(addr, data);
531 return 0;
534 static int kvm_io_window(void *opaque)
536 return 1;
540 static int kvm_halt(void *opaque, int vcpu)
542 return kvm_arch_halt(opaque, vcpu);
545 static int kvm_shutdown(void *opaque, int vcpu)
547 qemu_system_reset_request();
548 return 1;
551 static struct kvm_callbacks qemu_kvm_ops = {
552 .debug = kvm_debug,
553 .inb = kvm_inb,
554 .inw = kvm_inw,
555 .inl = kvm_inl,
556 .outb = kvm_outb,
557 .outw = kvm_outw,
558 .outl = kvm_outl,
559 .readb = kvm_readb,
560 .readw = kvm_readw,
561 .readl = kvm_readl,
562 .readq = kvm_readq,
563 .writeb = kvm_writeb,
564 .writew = kvm_writew,
565 .writel = kvm_writel,
566 .writeq = kvm_writeq,
567 .halt = kvm_halt,
568 .shutdown = kvm_shutdown,
569 .io_window = kvm_io_window,
570 .try_push_interrupts = try_push_interrupts,
571 .post_kvm_run = post_kvm_run,
572 .pre_kvm_run = pre_kvm_run,
575 int kvm_qemu_init()
577 /* Try to initialize kvm */
578 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
579 if (!kvm_context) {
580 return -1;
583 return 0;
586 int kvm_qemu_create_context(void)
588 int r;
589 if (!kvm_irqchip) {
590 kvm_disable_irqchip_creation(kvm_context);
592 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
593 kvm_qemu_destroy();
594 return -1;
596 r = kvm_arch_qemu_create_context();
597 if(r <0)
598 kvm_qemu_destroy();
599 return 0;
602 void kvm_qemu_destroy(void)
604 kvm_finalize(kvm_context);
607 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
608 unsigned long size,
609 unsigned long phys_offset)
611 #ifdef KVM_CAP_USER_MEMORY
612 int r = 0;
614 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
615 if (r) {
616 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
617 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
618 if (r)
619 return;
620 r = kvm_is_intersecting_mem(kvm_context, start_addr);
621 if (r)
622 kvm_create_mem_hole(kvm_context, start_addr, size);
623 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
624 phys_ram_base + phys_offset,
625 size, 0);
627 if (phys_offset & IO_MEM_ROM) {
628 phys_offset &= ~IO_MEM_ROM;
629 r = kvm_is_intersecting_mem(kvm_context, start_addr);
630 if (r)
631 kvm_create_mem_hole(kvm_context, start_addr, size);
632 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
633 phys_ram_base + phys_offset,
634 size, 0);
636 if (r < 0) {
637 printf("kvm_cpu_register_physical_memory: failed\n");
638 exit(1);
640 return;
642 #endif
643 if (phys_offset & IO_MEM_ROM) {
644 phys_offset &= ~IO_MEM_ROM;
645 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
649 int kvm_qemu_check_extension(int ext)
651 return kvm_check_extension(kvm_context, ext);
654 int kvm_qemu_init_env(CPUState *cenv)
656 return kvm_arch_qemu_init_env(cenv);
659 int kvm_update_debugger(CPUState *env)
661 struct kvm_debug_guest dbg;
662 int i;
664 dbg.enabled = 0;
665 if (env->nb_breakpoints || env->singlestep_enabled) {
666 dbg.enabled = 1;
667 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
668 dbg.breakpoints[i].enabled = 1;
669 dbg.breakpoints[i].address = env->breakpoints[i];
671 dbg.singlestep = env->singlestep_enabled;
673 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
678 * dirty pages logging
680 /* FIXME: use unsigned long pointer instead of unsigned char */
681 unsigned char *kvm_dirty_bitmap = NULL;
682 int kvm_physical_memory_set_dirty_tracking(int enable)
684 int r = 0;
686 if (!kvm_allowed)
687 return 0;
689 if (enable) {
690 if (!kvm_dirty_bitmap) {
691 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
692 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
693 if (kvm_dirty_bitmap == NULL) {
694 perror("Failed to allocate dirty pages bitmap");
695 r=-1;
697 else {
698 r = kvm_dirty_pages_log_enable_all(kvm_context);
702 else {
703 if (kvm_dirty_bitmap) {
704 r = kvm_dirty_pages_log_reset(kvm_context);
705 qemu_free(kvm_dirty_bitmap);
706 kvm_dirty_bitmap = NULL;
709 return r;
712 /* get kvm's dirty pages bitmap and update qemu's */
713 int kvm_get_dirty_pages_log_slot(unsigned long start_addr,
714 unsigned char *bitmap,
715 unsigned int offset,
716 unsigned int len)
718 int r;
719 unsigned int i, j, n=0;
720 unsigned char c;
721 unsigned page_number, addr, addr1;
723 memset(bitmap, 0, len);
724 r = kvm_get_dirty_pages(kvm_context, start_addr, bitmap);
725 if (r)
726 return r;
729 * bitmap-traveling is faster than memory-traveling (for addr...)
730 * especially when most of the memory is not dirty.
732 for (i=0; i<len; i++) {
733 c = bitmap[i];
734 while (c>0) {
735 j = ffsl(c) - 1;
736 c &= ~(1u<<j);
737 page_number = i * 8 + j;
738 addr1 = page_number * TARGET_PAGE_SIZE;
739 addr = offset + addr1;
740 cpu_physical_memory_set_dirty(addr);
741 n++;
744 return 0;
748 * get kvm's dirty pages bitmap and update qemu's
749 * we only care about physical ram, which resides in slots 0 and 3
751 int kvm_update_dirty_pages_log(void)
753 int r = 0, len;
755 len = BITMAP_SIZE(0xa0000);
756 r = kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap, 0 , len);
757 len = BITMAP_SIZE(phys_ram_size - 0xc0000);
758 r = r || kvm_get_dirty_pages_log_slot(0xc0000, kvm_dirty_bitmap, 0xc0000, len);
759 return r;
762 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
764 int r=0, len, offset;
766 len = BITMAP_SIZE(phys_ram_size);
767 memset(bitmap, 0, len);
769 r = kvm_get_mem_map(kvm_context, 0, bitmap);
770 if (r)
771 goto out;
773 offset = BITMAP_SIZE(0xc0000);
774 r = kvm_get_mem_map(kvm_context, 0xc0000, bitmap + offset);
776 out:
777 return r;
780 #ifdef KVM_CAP_IRQCHIP
782 int kvm_set_irq(int irq, int level)
784 return kvm_set_irq_level(kvm_context, irq, level);
787 #endif
789 #endif