kvm: libkvm: get_slot(): fix is-address-in-range check
[qemu-kvm/fedora.git] / qemu-kvm.c
blob8c48b291303d9dddbd58f4ddf72de127297a3e2e
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
6 #define KVM_ALLOWED_DEFAULT 1
7 #else
8 #define KVM_ALLOWED_DEFAULT 0
9 #endif
11 int kvm_allowed = KVM_ALLOWED_DEFAULT;
12 int kvm_irqchip = 1;
14 #ifdef USE_KVM
16 #include <string.h>
17 #include "hw/hw.h"
18 #include "sysemu.h"
20 #include "qemu-kvm.h"
21 #include <libkvm.h>
22 #include <pthread.h>
23 #include <sys/utsname.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
32 __thread CPUState *vcpu_env;
34 static sigset_t io_sigset, io_negsigset;
36 static int wait_hack;
38 #define SIG_IPI (SIGRTMIN+4)
40 struct vcpu_info {
41 int sipi_needed;
42 int init;
43 pthread_t thread;
44 int signalled;
45 int stop;
46 int stopped;
47 } vcpu_info[4];
49 static void sig_ipi_handler(int n)
53 void kvm_update_interrupt_request(CPUState *env)
55 if (env && env != vcpu_env) {
56 if (vcpu_info[env->cpu_index].signalled)
57 return;
58 vcpu_info[env->cpu_index].signalled = 1;
59 if (vcpu_info[env->cpu_index].thread)
60 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
64 void kvm_update_after_sipi(CPUState *env)
66 vcpu_info[env->cpu_index].sipi_needed = 1;
67 kvm_update_interrupt_request(env);
70 * the qemu bios waits using a busy loop that's much too short for
71 * kvm. add a wait after the first sipi.
74 static int first_sipi = 1;
76 if (first_sipi) {
77 wait_hack = 1;
78 first_sipi = 0;
83 void kvm_apic_init(CPUState *env)
85 if (env->cpu_index != 0)
86 vcpu_info[env->cpu_index].init = 1;
87 kvm_update_interrupt_request(env);
90 #include <signal.h>
92 static int try_push_interrupts(void *opaque)
94 return kvm_arch_try_push_interrupts(opaque);
97 static void post_kvm_run(void *opaque, int vcpu)
100 pthread_mutex_lock(&qemu_mutex);
101 kvm_arch_post_kvm_run(opaque, vcpu);
104 static int pre_kvm_run(void *opaque, int vcpu)
106 CPUState *env = cpu_single_env;
108 if (env->cpu_index == 0 && wait_hack) {
109 int i;
111 wait_hack = 0;
113 pthread_mutex_unlock(&qemu_mutex);
114 for (i = 0; i < 10; ++i)
115 usleep(1000);
116 pthread_mutex_lock(&qemu_mutex);
119 kvm_arch_pre_kvm_run(opaque, vcpu);
121 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
122 return 1;
123 pthread_mutex_unlock(&qemu_mutex);
124 return 0;
127 void kvm_load_registers(CPUState *env)
129 if (kvm_allowed)
130 kvm_arch_load_regs(env);
133 void kvm_save_registers(CPUState *env)
135 if (kvm_allowed)
136 kvm_arch_save_regs(env);
139 int kvm_cpu_exec(CPUState *env)
141 int r;
143 r = kvm_run(kvm_context, env->cpu_index);
144 if (r < 0) {
145 printf("kvm_run returned %d\n", r);
146 exit(1);
149 return 0;
152 extern int vm_running;
154 static int has_work(CPUState *env)
156 if (!vm_running)
157 return 0;
158 if (!(env->hflags & HF_HALTED_MASK))
159 return 1;
160 return kvm_arch_has_work(env);
163 static int kvm_eat_signal(CPUState *env, int timeout)
165 struct timespec ts;
166 int r, e, ret = 0;
167 siginfo_t siginfo;
168 struct sigaction sa;
170 ts.tv_sec = timeout / 1000;
171 ts.tv_nsec = (timeout % 1000) * 1000000;
172 r = sigtimedwait(&io_sigset, &siginfo, &ts);
173 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
174 return 0;
175 e = errno;
176 pthread_mutex_lock(&qemu_mutex);
177 cpu_single_env = vcpu_env;
178 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
179 printf("sigtimedwait: %s\n", strerror(e));
180 exit(1);
182 if (r != -1) {
183 sigaction(siginfo.si_signo, NULL, &sa);
184 sa.sa_handler(siginfo.si_signo);
185 ret = 1;
187 pthread_mutex_unlock(&qemu_mutex);
189 return ret;
193 static void kvm_eat_signals(CPUState *env, int timeout)
195 int r = 0;
197 while (kvm_eat_signal(env, 0))
198 r = 1;
199 if (!r && timeout) {
200 r = kvm_eat_signal(env, timeout);
201 if (r)
202 while (kvm_eat_signal(env, 0))
206 * we call select() even if no signal was received, to account for
207 * for which there is no signal handler installed.
209 pthread_mutex_lock(&qemu_mutex);
210 cpu_single_env = vcpu_env;
211 main_loop_wait(0);
212 pthread_mutex_unlock(&qemu_mutex);
215 static void kvm_main_loop_wait(CPUState *env, int timeout)
217 pthread_mutex_unlock(&qemu_mutex);
218 if (env->cpu_index == 0)
219 kvm_eat_signals(env, timeout);
220 else {
221 if (!kvm_irqchip_in_kernel(kvm_context) &&
222 (timeout || vcpu_info[env->cpu_index].stopped)) {
223 sigset_t set;
224 int n;
226 paused:
227 sigemptyset(&set);
228 sigaddset(&set, SIG_IPI);
229 sigwait(&set, &n);
230 } else {
231 struct timespec ts;
232 siginfo_t siginfo;
233 sigset_t set;
235 ts.tv_sec = 0;
236 ts.tv_nsec = 0;
237 sigemptyset(&set);
238 sigaddset(&set, SIG_IPI);
239 sigtimedwait(&set, &siginfo, &ts);
241 if (vcpu_info[env->cpu_index].stop) {
242 vcpu_info[env->cpu_index].stop = 0;
243 vcpu_info[env->cpu_index].stopped = 1;
244 pthread_kill(vcpu_info[0].thread, SIG_IPI);
245 goto paused;
248 pthread_mutex_lock(&qemu_mutex);
249 cpu_single_env = env;
250 vcpu_info[env->cpu_index].signalled = 0;
253 static int all_threads_paused(void)
255 int i;
257 for (i = 1; i < smp_cpus; ++i)
258 if (vcpu_info[i].stopped)
259 return 0;
260 return 1;
263 static void pause_other_threads(void)
265 int i;
267 for (i = 1; i < smp_cpus; ++i) {
268 vcpu_info[i].stop = 1;
269 pthread_kill(vcpu_info[i].thread, SIG_IPI);
271 while (!all_threads_paused())
272 kvm_eat_signals(vcpu_env, 0);
275 static void resume_other_threads(void)
277 int i;
279 for (i = 1; i < smp_cpus; ++i) {
280 vcpu_info[i].stop = 0;
281 vcpu_info[i].stopped = 0;
282 pthread_kill(vcpu_info[i].thread, SIG_IPI);
286 static void kvm_vm_state_change_handler(void *context, int running)
288 if (running)
289 resume_other_threads();
290 else
291 pause_other_threads();
294 static void update_regs_for_sipi(CPUState *env)
296 kvm_arch_update_regs_for_sipi(env);
297 vcpu_info[env->cpu_index].sipi_needed = 0;
298 vcpu_info[env->cpu_index].init = 0;
301 static void update_regs_for_init(CPUState *env)
303 cpu_reset(env);
304 kvm_arch_load_regs(env);
307 static void setup_kernel_sigmask(CPUState *env)
309 sigset_t set;
311 sigprocmask(SIG_BLOCK, NULL, &set);
312 sigdelset(&set, SIG_IPI);
313 if (env->cpu_index == 0)
314 sigandset(&set, &set, &io_negsigset);
316 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
319 static int kvm_main_loop_cpu(CPUState *env)
321 struct vcpu_info *info = &vcpu_info[env->cpu_index];
323 setup_kernel_sigmask(env);
324 pthread_mutex_lock(&qemu_mutex);
326 kvm_qemu_init_env(env);
327 env->ready_for_interrupt_injection = 1;
329 cpu_single_env = env;
330 while (1) {
331 while (!has_work(env))
332 kvm_main_loop_wait(env, 10);
333 if (env->interrupt_request & CPU_INTERRUPT_HARD)
334 env->hflags &= ~HF_HALTED_MASK;
335 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
336 update_regs_for_sipi(env);
337 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
338 update_regs_for_init(env);
339 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
340 kvm_cpu_exec(env);
341 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
342 kvm_main_loop_wait(env, 0);
343 if (qemu_shutdown_requested())
344 break;
345 else if (qemu_powerdown_requested())
346 qemu_system_powerdown();
347 else if (qemu_reset_requested()) {
348 env->interrupt_request = 0;
349 qemu_system_reset();
350 kvm_arch_load_regs(env);
353 pthread_mutex_unlock(&qemu_mutex);
354 return 0;
357 static void *ap_main_loop(void *_env)
359 CPUState *env = _env;
360 sigset_t signals;
362 vcpu_env = env;
363 sigfillset(&signals);
364 //sigdelset(&signals, SIG_IPI);
365 sigprocmask(SIG_BLOCK, &signals, NULL);
366 kvm_create_vcpu(kvm_context, env->cpu_index);
367 kvm_qemu_init_env(env);
368 if (kvm_irqchip_in_kernel(kvm_context))
369 env->hflags &= ~HF_HALTED_MASK;
370 kvm_main_loop_cpu(env);
371 return NULL;
374 static void kvm_add_signal(int signum)
376 sigaddset(&io_sigset, signum);
377 sigdelset(&io_negsigset, signum);
378 sigprocmask(SIG_BLOCK, &io_sigset, NULL);
381 int kvm_init_ap(void)
383 CPUState *env = first_cpu->next_cpu;
384 int i;
386 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
387 sigemptyset(&io_sigset);
388 sigfillset(&io_negsigset);
389 kvm_add_signal(SIGIO);
390 kvm_add_signal(SIGALRM);
391 kvm_add_signal(SIGUSR2);
392 if (!kvm_irqchip_in_kernel(kvm_context))
393 kvm_add_signal(SIG_IPI);
395 vcpu_env = first_cpu;
396 signal(SIG_IPI, sig_ipi_handler);
397 for (i = 1; i < smp_cpus; ++i) {
398 pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
399 env = env->next_cpu;
401 return 0;
404 int kvm_main_loop(void)
406 vcpu_info[0].thread = pthread_self();
407 return kvm_main_loop_cpu(first_cpu);
410 static int kvm_debug(void *opaque, int vcpu)
412 CPUState *env = cpu_single_env;
414 env->exception_index = EXCP_DEBUG;
415 return 1;
418 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
420 *data = cpu_inb(0, addr);
421 return 0;
424 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
426 *data = cpu_inw(0, addr);
427 return 0;
430 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
432 *data = cpu_inl(0, addr);
433 return 0;
436 #define PM_IO_BASE 0xb000
438 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
440 if (addr == 0xb2) {
441 switch (data) {
442 case 0: {
443 cpu_outb(0, 0xb3, 0);
444 break;
446 case 0xf0: {
447 unsigned x;
449 /* enable acpi */
450 x = cpu_inw(0, PM_IO_BASE + 4);
451 x &= ~1;
452 cpu_outw(0, PM_IO_BASE + 4, x);
453 break;
455 case 0xf1: {
456 unsigned x;
458 /* enable acpi */
459 x = cpu_inw(0, PM_IO_BASE + 4);
460 x |= 1;
461 cpu_outw(0, PM_IO_BASE + 4, x);
462 break;
464 default:
465 break;
467 return 0;
469 cpu_outb(0, addr, data);
470 return 0;
473 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
475 cpu_outw(0, addr, data);
476 return 0;
479 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
481 cpu_outl(0, addr, data);
482 return 0;
485 static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
487 *data = ldub_phys(addr);
488 return 0;
491 static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
493 *data = lduw_phys(addr);
494 return 0;
497 static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
499 /* hack: Red Hat 7.1 generates some wierd accesses. */
500 if (addr > 0xa0000 - 4 && addr < 0xa0000) {
501 *data = 0;
502 return 0;
505 *data = ldl_phys(addr);
506 return 0;
509 static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
511 *data = ldq_phys(addr);
512 return 0;
515 static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
517 stb_phys(addr, data);
518 return 0;
521 static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
523 stw_phys(addr, data);
524 return 0;
527 static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
529 stl_phys(addr, data);
530 return 0;
533 static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
535 stq_phys(addr, data);
536 return 0;
539 static int kvm_io_window(void *opaque)
541 return 1;
545 static int kvm_halt(void *opaque, int vcpu)
547 return kvm_arch_halt(opaque, vcpu);
550 static int kvm_shutdown(void *opaque, int vcpu)
552 qemu_system_reset_request();
553 return 1;
556 static struct kvm_callbacks qemu_kvm_ops = {
557 .debug = kvm_debug,
558 .inb = kvm_inb,
559 .inw = kvm_inw,
560 .inl = kvm_inl,
561 .outb = kvm_outb,
562 .outw = kvm_outw,
563 .outl = kvm_outl,
564 .readb = kvm_readb,
565 .readw = kvm_readw,
566 .readl = kvm_readl,
567 .readq = kvm_readq,
568 .writeb = kvm_writeb,
569 .writew = kvm_writew,
570 .writel = kvm_writel,
571 .writeq = kvm_writeq,
572 .halt = kvm_halt,
573 .shutdown = kvm_shutdown,
574 .io_window = kvm_io_window,
575 .try_push_interrupts = try_push_interrupts,
576 .post_kvm_run = post_kvm_run,
577 .pre_kvm_run = pre_kvm_run,
580 int kvm_qemu_init()
582 /* Try to initialize kvm */
583 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
584 if (!kvm_context) {
585 return -1;
588 return 0;
591 int kvm_qemu_create_context(void)
593 int r;
594 if (!kvm_irqchip) {
595 kvm_disable_irqchip_creation(kvm_context);
597 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
598 kvm_qemu_destroy();
599 return -1;
601 r = kvm_arch_qemu_create_context();
602 if(r <0)
603 kvm_qemu_destroy();
604 return 0;
607 void kvm_qemu_destroy(void)
609 kvm_finalize(kvm_context);
612 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
613 unsigned long size,
614 unsigned long phys_offset)
616 #ifdef KVM_CAP_USER_MEMORY
617 int r = 0;
619 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
620 if (r) {
621 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
622 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
623 if (r)
624 return;
625 r = kvm_is_intersecting_mem(kvm_context, start_addr);
626 if (r)
627 kvm_create_mem_hole(kvm_context, start_addr, size);
628 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
629 phys_ram_base + phys_offset,
630 size, 0);
632 if (phys_offset & IO_MEM_ROM) {
633 phys_offset &= ~IO_MEM_ROM;
634 r = kvm_is_intersecting_mem(kvm_context, start_addr);
635 if (r)
636 kvm_create_mem_hole(kvm_context, start_addr, size);
637 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
638 phys_ram_base + phys_offset,
639 size, 0);
641 if (r < 0) {
642 printf("kvm_cpu_register_physical_memory: failed\n");
643 exit(1);
645 return;
647 #endif
648 if (phys_offset & IO_MEM_ROM) {
649 phys_offset &= ~IO_MEM_ROM;
650 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
654 int kvm_qemu_check_extension(int ext)
656 return kvm_check_extension(kvm_context, ext);
659 int kvm_qemu_init_env(CPUState *cenv)
661 return kvm_arch_qemu_init_env(cenv);
664 int kvm_update_debugger(CPUState *env)
666 struct kvm_debug_guest dbg;
667 int i;
669 dbg.enabled = 0;
670 if (env->nb_breakpoints || env->singlestep_enabled) {
671 dbg.enabled = 1;
672 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
673 dbg.breakpoints[i].enabled = 1;
674 dbg.breakpoints[i].address = env->breakpoints[i];
676 dbg.singlestep = env->singlestep_enabled;
678 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
683 * dirty pages logging
685 /* FIXME: use unsigned long pointer instead of unsigned char */
686 unsigned char *kvm_dirty_bitmap = NULL;
687 int kvm_physical_memory_set_dirty_tracking(int enable)
689 int r = 0;
691 if (!kvm_allowed)
692 return 0;
694 if (enable) {
695 if (!kvm_dirty_bitmap) {
696 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
697 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
698 if (kvm_dirty_bitmap == NULL) {
699 perror("Failed to allocate dirty pages bitmap");
700 r=-1;
702 else {
703 r = kvm_dirty_pages_log_enable_all(kvm_context);
707 else {
708 if (kvm_dirty_bitmap) {
709 r = kvm_dirty_pages_log_reset(kvm_context);
710 qemu_free(kvm_dirty_bitmap);
711 kvm_dirty_bitmap = NULL;
714 return r;
717 /* get kvm's dirty pages bitmap and update qemu's */
718 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
719 unsigned char *bitmap,
720 unsigned int offset,
721 unsigned long mem_size)
723 unsigned int i, j, n=0;
724 unsigned char c;
725 unsigned page_number, addr, addr1;
726 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
729 * bitmap-traveling is faster than memory-traveling (for addr...)
730 * especially when most of the memory is not dirty.
732 for (i=0; i<len; i++) {
733 c = bitmap[i];
734 while (c>0) {
735 j = ffsl(c) - 1;
736 c &= ~(1u<<j);
737 page_number = i * 8 + j;
738 addr1 = page_number * TARGET_PAGE_SIZE;
739 addr = offset + addr1;
740 cpu_physical_memory_set_dirty(addr);
741 n++;
744 return 0;
746 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
747 void *bitmap, void *opaque)
749 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
753 * get kvm's dirty pages bitmap and update qemu's
754 * we only care about physical ram, which resides in slots 0 and 3
756 int kvm_update_dirty_pages_log(void)
758 int r = 0;
761 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
762 kvm_dirty_bitmap, NULL,
763 kvm_get_dirty_bitmap_cb);
764 return r;
767 int kvm_get_phys_ram_bitmap_cb(unsigned long start, unsigned long len,
768 void *local_bitmap, void *qemu_bitmap)
770 unsigned int bsize = ((len/TARGET_PAGE_SIZE) + 7) / 8;
771 unsigned int offset = ((start/TARGET_PAGE_SIZE) + 7) / 8;
773 memcpy(qemu_bitmap + offset, local_bitmap, bsize);
775 return 0;
778 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
780 int r=0;
781 void *local_bitmap;
782 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
784 local_bitmap = qemu_malloc(bsize);
785 if (!local_bitmap) {
786 fprintf(stderr, "could not allocate memory for phys_page bitmap\n");
787 return 1;
790 r = kvm_get_mem_map_range(kvm_context, 0, phys_ram_size,
791 local_bitmap, bitmap,
792 kvm_get_phys_ram_bitmap_cb);
794 qemu_free(local_bitmap);
795 return r;
798 #ifdef KVM_CAP_IRQCHIP
800 int kvm_set_irq(int irq, int level)
802 return kvm_set_irq_level(kvm_context, irq, level);
805 #endif
807 #endif