Allow SIG_IPI with kernel irqchip
[qemu-kvm/fedora.git] / qemu-kvm.c
bloba58a16b1ce2c8d64aaf325fb0b019af6cf5544e1
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
6 #define KVM_ALLOWED_DEFAULT 1
7 #else
8 #define KVM_ALLOWED_DEFAULT 0
9 #endif
11 int kvm_allowed = KVM_ALLOWED_DEFAULT;
12 int kvm_irqchip = 1;
14 #ifdef USE_KVM
16 #include <string.h>
17 #include "hw/hw.h"
18 #include "sysemu.h"
20 #include "qemu-kvm.h"
21 #include <libkvm.h>
22 #include <pthread.h>
23 #include <sys/utsname.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
32 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
33 __thread CPUState *vcpu_env;
35 static sigset_t io_sigset, io_negsigset;
37 #define SIG_IPI (SIGRTMIN+4)
39 struct vcpu_info {
40 int sipi_needed;
41 int init;
42 pthread_t thread;
43 int signalled;
44 int stop;
45 int stopped;
46 } vcpu_info[4];
48 static void sig_ipi_handler(int n)
52 void kvm_update_interrupt_request(CPUState *env)
54 if (env && env != vcpu_env) {
55 if (vcpu_info[env->cpu_index].signalled)
56 return;
57 vcpu_info[env->cpu_index].signalled = 1;
58 if (vcpu_info[env->cpu_index].thread)
59 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
63 void kvm_update_after_sipi(CPUState *env)
65 vcpu_info[env->cpu_index].sipi_needed = 1;
66 kvm_update_interrupt_request(env);
69 void kvm_apic_init(CPUState *env)
71 if (env->cpu_index != 0)
72 vcpu_info[env->cpu_index].init = 1;
73 kvm_update_interrupt_request(env);
76 #include <signal.h>
78 static int try_push_interrupts(void *opaque)
80 return kvm_arch_try_push_interrupts(opaque);
83 static void post_kvm_run(void *opaque, int vcpu)
86 pthread_mutex_lock(&qemu_mutex);
87 kvm_arch_post_kvm_run(opaque, vcpu);
90 static int pre_kvm_run(void *opaque, int vcpu)
92 CPUState *env = cpu_single_env;
94 kvm_arch_pre_kvm_run(opaque, vcpu);
96 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
97 return 1;
98 pthread_mutex_unlock(&qemu_mutex);
99 return 0;
102 void kvm_load_registers(CPUState *env)
104 if (kvm_allowed)
105 kvm_arch_load_regs(env);
108 void kvm_save_registers(CPUState *env)
110 if (kvm_allowed)
111 kvm_arch_save_regs(env);
114 int kvm_cpu_exec(CPUState *env)
116 int r;
118 r = kvm_run(kvm_context, env->cpu_index);
119 if (r < 0) {
120 printf("kvm_run returned %d\n", r);
121 exit(1);
124 return 0;
127 extern int vm_running;
129 static int has_work(CPUState *env)
131 if (!vm_running)
132 return 0;
133 if (!(env->hflags & HF_HALTED_MASK))
134 return 1;
135 return kvm_arch_has_work(env);
138 static int kvm_eat_signal(CPUState *env, int timeout)
140 struct timespec ts;
141 int r, e, ret = 0;
142 siginfo_t siginfo;
143 struct sigaction sa;
145 ts.tv_sec = timeout / 1000;
146 ts.tv_nsec = (timeout % 1000) * 1000000;
147 r = sigtimedwait(&io_sigset, &siginfo, &ts);
148 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
149 return 0;
150 e = errno;
151 pthread_mutex_lock(&qemu_mutex);
152 cpu_single_env = vcpu_env;
153 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
154 printf("sigtimedwait: %s\n", strerror(e));
155 exit(1);
157 if (r != -1) {
158 sigaction(siginfo.si_signo, NULL, &sa);
159 sa.sa_handler(siginfo.si_signo);
160 if (siginfo.si_signo == SIGUSR2)
161 pthread_cond_signal(&qemu_aio_cond);
162 ret = 1;
164 pthread_mutex_unlock(&qemu_mutex);
166 return ret;
170 static void kvm_eat_signals(CPUState *env, int timeout)
172 int r = 0;
174 while (kvm_eat_signal(env, 0))
175 r = 1;
176 if (!r && timeout) {
177 r = kvm_eat_signal(env, timeout);
178 if (r)
179 while (kvm_eat_signal(env, 0))
183 * we call select() even if no signal was received, to account for
184 * for which there is no signal handler installed.
186 pthread_mutex_lock(&qemu_mutex);
187 cpu_single_env = vcpu_env;
188 main_loop_wait(0);
189 pthread_mutex_unlock(&qemu_mutex);
192 static void kvm_main_loop_wait(CPUState *env, int timeout)
194 pthread_mutex_unlock(&qemu_mutex);
195 if (env->cpu_index == 0)
196 kvm_eat_signals(env, timeout);
197 else {
198 if (!kvm_irqchip_in_kernel(kvm_context) &&
199 (timeout || vcpu_info[env->cpu_index].stopped)) {
200 sigset_t set;
201 int n;
203 paused:
204 sigemptyset(&set);
205 sigaddset(&set, SIG_IPI);
206 sigwait(&set, &n);
207 } else {
208 struct timespec ts;
209 siginfo_t siginfo;
210 sigset_t set;
212 ts.tv_sec = 0;
213 ts.tv_nsec = 0;
214 sigemptyset(&set);
215 sigaddset(&set, SIG_IPI);
216 sigtimedwait(&set, &siginfo, &ts);
218 if (vcpu_info[env->cpu_index].stop) {
219 vcpu_info[env->cpu_index].stop = 0;
220 vcpu_info[env->cpu_index].stopped = 1;
221 pthread_kill(vcpu_info[0].thread, SIG_IPI);
222 goto paused;
225 pthread_mutex_lock(&qemu_mutex);
226 cpu_single_env = env;
227 vcpu_info[env->cpu_index].signalled = 0;
230 static int all_threads_paused(void)
232 int i;
234 for (i = 1; i < smp_cpus; ++i)
235 if (vcpu_info[i].stopped)
236 return 0;
237 return 1;
240 static void pause_other_threads(void)
242 int i;
244 for (i = 1; i < smp_cpus; ++i) {
245 vcpu_info[i].stop = 1;
246 pthread_kill(vcpu_info[i].thread, SIG_IPI);
248 while (!all_threads_paused())
249 kvm_eat_signals(vcpu_env, 0);
252 static void resume_other_threads(void)
254 int i;
256 for (i = 1; i < smp_cpus; ++i) {
257 vcpu_info[i].stop = 0;
258 vcpu_info[i].stopped = 0;
259 pthread_kill(vcpu_info[i].thread, SIG_IPI);
263 static void kvm_vm_state_change_handler(void *context, int running)
265 if (running)
266 resume_other_threads();
267 else
268 pause_other_threads();
271 static void update_regs_for_sipi(CPUState *env)
273 kvm_arch_update_regs_for_sipi(env);
274 vcpu_info[env->cpu_index].sipi_needed = 0;
275 vcpu_info[env->cpu_index].init = 0;
278 static void update_regs_for_init(CPUState *env)
280 cpu_reset(env);
281 kvm_arch_load_regs(env);
284 static void setup_kernel_sigmask(CPUState *env)
286 sigset_t set;
288 sigprocmask(SIG_BLOCK, NULL, &set);
289 sigdelset(&set, SIG_IPI);
290 if (env->cpu_index == 0)
291 sigandset(&set, &set, &io_negsigset);
293 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
296 static int kvm_main_loop_cpu(CPUState *env)
298 struct vcpu_info *info = &vcpu_info[env->cpu_index];
300 setup_kernel_sigmask(env);
301 pthread_mutex_lock(&qemu_mutex);
303 kvm_qemu_init_env(env);
304 env->ready_for_interrupt_injection = 1;
306 cpu_single_env = env;
307 #ifdef TARGET_I386
308 kvm_tpr_opt_setup(env);
309 #endif
310 while (1) {
311 while (!has_work(env))
312 kvm_main_loop_wait(env, 10);
313 if (env->interrupt_request & CPU_INTERRUPT_HARD)
314 env->hflags &= ~HF_HALTED_MASK;
315 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
316 update_regs_for_sipi(env);
317 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
318 update_regs_for_init(env);
319 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
320 kvm_cpu_exec(env);
321 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
322 kvm_main_loop_wait(env, 0);
323 if (qemu_shutdown_requested())
324 break;
325 else if (qemu_powerdown_requested())
326 qemu_system_powerdown();
327 else if (qemu_reset_requested()) {
328 env->interrupt_request = 0;
329 qemu_system_reset();
330 kvm_arch_load_regs(env);
333 pthread_mutex_unlock(&qemu_mutex);
334 return 0;
337 static void *ap_main_loop(void *_env)
339 CPUState *env = _env;
340 sigset_t signals;
342 vcpu_env = env;
343 sigfillset(&signals);
344 //sigdelset(&signals, SIG_IPI);
345 sigprocmask(SIG_BLOCK, &signals, NULL);
346 kvm_create_vcpu(kvm_context, env->cpu_index);
347 kvm_qemu_init_env(env);
348 if (kvm_irqchip_in_kernel(kvm_context))
349 env->hflags &= ~HF_HALTED_MASK;
350 kvm_main_loop_cpu(env);
351 return NULL;
354 static void kvm_add_signal(int signum)
356 sigaddset(&io_sigset, signum);
357 sigdelset(&io_negsigset, signum);
358 sigprocmask(SIG_BLOCK, &io_sigset, NULL);
361 int kvm_init_ap(void)
363 CPUState *env = first_cpu->next_cpu;
364 int i;
366 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
367 sigemptyset(&io_sigset);
368 sigfillset(&io_negsigset);
369 kvm_add_signal(SIGIO);
370 kvm_add_signal(SIGALRM);
371 kvm_add_signal(SIGUSR2);
372 kvm_add_signal(SIG_IPI);
374 vcpu_env = first_cpu;
375 signal(SIG_IPI, sig_ipi_handler);
376 for (i = 1; i < smp_cpus; ++i) {
377 pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
378 env = env->next_cpu;
380 return 0;
383 int kvm_main_loop(void)
385 vcpu_info[0].thread = pthread_self();
386 pthread_mutex_unlock(&qemu_mutex);
387 return kvm_main_loop_cpu(first_cpu);
390 static int kvm_debug(void *opaque, int vcpu)
392 CPUState *env = cpu_single_env;
394 env->exception_index = EXCP_DEBUG;
395 return 1;
398 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
400 *data = cpu_inb(0, addr);
401 return 0;
404 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
406 *data = cpu_inw(0, addr);
407 return 0;
410 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
412 *data = cpu_inl(0, addr);
413 return 0;
416 #define PM_IO_BASE 0xb000
418 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
420 if (addr == 0xb2) {
421 switch (data) {
422 case 0: {
423 cpu_outb(0, 0xb3, 0);
424 break;
426 case 0xf0: {
427 unsigned x;
429 /* enable acpi */
430 x = cpu_inw(0, PM_IO_BASE + 4);
431 x &= ~1;
432 cpu_outw(0, PM_IO_BASE + 4, x);
433 break;
435 case 0xf1: {
436 unsigned x;
438 /* enable acpi */
439 x = cpu_inw(0, PM_IO_BASE + 4);
440 x |= 1;
441 cpu_outw(0, PM_IO_BASE + 4, x);
442 break;
444 default:
445 break;
447 return 0;
449 cpu_outb(0, addr, data);
450 return 0;
453 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
455 cpu_outw(0, addr, data);
456 return 0;
459 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
461 cpu_outl(0, addr, data);
462 return 0;
465 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
467 cpu_physical_memory_rw(addr, data, len, 0);
468 return 0;
471 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
473 cpu_physical_memory_rw(addr, data, len, 1);
474 return 0;
477 static int kvm_io_window(void *opaque)
479 return 1;
483 static int kvm_halt(void *opaque, int vcpu)
485 return kvm_arch_halt(opaque, vcpu);
488 static int kvm_shutdown(void *opaque, int vcpu)
490 qemu_system_reset_request();
491 return 1;
494 static struct kvm_callbacks qemu_kvm_ops = {
495 .debug = kvm_debug,
496 .inb = kvm_inb,
497 .inw = kvm_inw,
498 .inl = kvm_inl,
499 .outb = kvm_outb,
500 .outw = kvm_outw,
501 .outl = kvm_outl,
502 .mmio_read = kvm_mmio_read,
503 .mmio_write = kvm_mmio_write,
504 .halt = kvm_halt,
505 .shutdown = kvm_shutdown,
506 .io_window = kvm_io_window,
507 .try_push_interrupts = try_push_interrupts,
508 .post_kvm_run = post_kvm_run,
509 .pre_kvm_run = pre_kvm_run,
510 #ifdef TARGET_I386
511 .tpr_access = handle_tpr_access,
512 #endif
513 #ifdef TARGET_PPC
514 .powerpc_dcr_read = handle_powerpc_dcr_read,
515 .powerpc_dcr_write = handle_powerpc_dcr_write,
516 #endif
519 int kvm_qemu_init()
521 /* Try to initialize kvm */
522 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
523 if (!kvm_context) {
524 return -1;
526 pthread_mutex_lock(&qemu_mutex);
528 return 0;
531 int kvm_qemu_create_context(void)
533 int r;
534 if (!kvm_irqchip) {
535 kvm_disable_irqchip_creation(kvm_context);
537 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
538 kvm_qemu_destroy();
539 return -1;
541 r = kvm_arch_qemu_create_context();
542 if(r <0)
543 kvm_qemu_destroy();
544 return 0;
547 void kvm_qemu_destroy(void)
549 kvm_finalize(kvm_context);
552 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
553 unsigned long size,
554 unsigned long phys_offset)
556 #ifdef KVM_CAP_USER_MEMORY
557 int r = 0;
559 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
560 if (r) {
561 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
562 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
563 if (r)
564 return;
565 r = kvm_is_intersecting_mem(kvm_context, start_addr);
566 if (r)
567 kvm_create_mem_hole(kvm_context, start_addr, size);
568 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
569 phys_ram_base + phys_offset,
570 size, 0);
572 if (phys_offset & IO_MEM_ROM) {
573 phys_offset &= ~IO_MEM_ROM;
574 r = kvm_is_intersecting_mem(kvm_context, start_addr);
575 if (r)
576 kvm_create_mem_hole(kvm_context, start_addr, size);
577 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
578 phys_ram_base + phys_offset,
579 size, 0);
581 if (r < 0) {
582 printf("kvm_cpu_register_physical_memory: failed\n");
583 exit(1);
585 return;
587 #endif
588 if (phys_offset & IO_MEM_ROM) {
589 phys_offset &= ~IO_MEM_ROM;
590 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
594 int kvm_qemu_check_extension(int ext)
596 return kvm_check_extension(kvm_context, ext);
599 int kvm_qemu_init_env(CPUState *cenv)
601 return kvm_arch_qemu_init_env(cenv);
604 int kvm_update_debugger(CPUState *env)
606 struct kvm_debug_guest dbg;
607 int i;
609 dbg.enabled = 0;
610 if (env->nb_breakpoints || env->singlestep_enabled) {
611 dbg.enabled = 1;
612 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
613 dbg.breakpoints[i].enabled = 1;
614 dbg.breakpoints[i].address = env->breakpoints[i];
616 dbg.singlestep = env->singlestep_enabled;
618 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
623 * dirty pages logging
625 /* FIXME: use unsigned long pointer instead of unsigned char */
626 unsigned char *kvm_dirty_bitmap = NULL;
627 int kvm_physical_memory_set_dirty_tracking(int enable)
629 int r = 0;
631 if (!kvm_allowed)
632 return 0;
634 if (enable) {
635 if (!kvm_dirty_bitmap) {
636 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
637 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
638 if (kvm_dirty_bitmap == NULL) {
639 perror("Failed to allocate dirty pages bitmap");
640 r=-1;
642 else {
643 r = kvm_dirty_pages_log_enable_all(kvm_context);
647 else {
648 if (kvm_dirty_bitmap) {
649 r = kvm_dirty_pages_log_reset(kvm_context);
650 qemu_free(kvm_dirty_bitmap);
651 kvm_dirty_bitmap = NULL;
654 return r;
657 /* get kvm's dirty pages bitmap and update qemu's */
658 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
659 unsigned char *bitmap,
660 unsigned int offset,
661 unsigned long mem_size)
663 unsigned int i, j, n=0;
664 unsigned char c;
665 unsigned page_number, addr, addr1;
666 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
669 * bitmap-traveling is faster than memory-traveling (for addr...)
670 * especially when most of the memory is not dirty.
672 for (i=0; i<len; i++) {
673 c = bitmap[i];
674 while (c>0) {
675 j = ffsl(c) - 1;
676 c &= ~(1u<<j);
677 page_number = i * 8 + j;
678 addr1 = page_number * TARGET_PAGE_SIZE;
679 addr = offset + addr1;
680 cpu_physical_memory_set_dirty(addr);
681 n++;
684 return 0;
686 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
687 void *bitmap, void *opaque)
689 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
693 * get kvm's dirty pages bitmap and update qemu's
694 * we only care about physical ram, which resides in slots 0 and 3
696 int kvm_update_dirty_pages_log(void)
698 int r = 0;
701 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
702 kvm_dirty_bitmap, NULL,
703 kvm_get_dirty_bitmap_cb);
704 return r;
707 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
709 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
710 unsigned int brsize = BITMAP_SIZE(ram_size);
711 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
712 unsigned int extra_bytes = (extra_pages +7)/8;
713 unsigned int hole_start = BITMAP_SIZE(0xa0000);
714 unsigned int hole_end = BITMAP_SIZE(0xc0000);
716 memset(bitmap, 0xFF, brsize + extra_bytes);
717 memset(bitmap + hole_start, 0, hole_end - hole_start);
718 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
720 return 0;
723 #ifdef KVM_CAP_IRQCHIP
725 int kvm_set_irq(int irq, int level)
727 return kvm_set_irq_level(kvm_context, irq, level);
730 #endif
732 void qemu_kvm_aio_wait_start(void)
736 void qemu_kvm_aio_wait(void)
738 if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
739 pthread_mutex_unlock(&qemu_mutex);
740 kvm_eat_signal(cpu_single_env, 1000);
741 pthread_mutex_lock(&qemu_mutex);
742 } else {
743 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
747 void qemu_kvm_aio_wait_end(void)
751 #endif