Separate thread for IO handling
[qemu-kvm/fedora.git] / qemu-kvm.c
bloba1d073b1e304167b1420412747e3435c04afeed4
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <string.h>
16 #include "hw/hw.h"
17 #include "sysemu.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
23 #include <sys/syscall.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 static int qemu_kvm_reset_requested;
33 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
34 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
35 __thread struct vcpu_info *vcpu;
37 struct qemu_kvm_signal_table {
38 sigset_t sigset;
39 sigset_t negsigset;
42 static struct qemu_kvm_signal_table io_signal_table;
43 static struct qemu_kvm_signal_table vcpu_signal_table;
45 #define SIG_IPI (SIGRTMIN+4)
47 struct vcpu_info {
48 CPUState *env;
49 int sipi_needed;
50 int init;
51 pthread_t thread;
52 int signalled;
53 int stop;
54 int stopped;
55 } vcpu_info[256];
57 pthread_t io_thread;
59 static inline unsigned long kvm_get_thread_id(void)
61 return syscall(SYS_gettid);
64 CPUState *qemu_kvm_cpu_env(int index)
66 return vcpu_info[index].env;
69 static void sig_ipi_handler(int n)
73 void kvm_update_interrupt_request(CPUState *env)
75 int signal = 0;
77 if (env) {
78 if (!vcpu)
79 signal = 1;
80 if (vcpu && env != vcpu->env && !vcpu_info[env->cpu_index].signalled)
81 signal = 1;
83 if (signal) {
84 vcpu_info[env->cpu_index].signalled = 1;
85 if (vcpu_info[env->cpu_index].thread)
86 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
91 void kvm_update_after_sipi(CPUState *env)
93 vcpu_info[env->cpu_index].sipi_needed = 1;
94 kvm_update_interrupt_request(env);
97 void kvm_apic_init(CPUState *env)
99 if (env->cpu_index != 0)
100 vcpu_info[env->cpu_index].init = 1;
101 kvm_update_interrupt_request(env);
104 #include <signal.h>
106 static int try_push_interrupts(void *opaque)
108 return kvm_arch_try_push_interrupts(opaque);
111 static void post_kvm_run(void *opaque, int vcpu)
114 pthread_mutex_lock(&qemu_mutex);
115 kvm_arch_post_kvm_run(opaque, vcpu);
118 static int pre_kvm_run(void *opaque, int vcpu)
120 CPUState *env = qemu_kvm_cpu_env(vcpu);
122 kvm_arch_pre_kvm_run(opaque, vcpu);
124 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
125 return 1;
126 pthread_mutex_unlock(&qemu_mutex);
127 return 0;
130 void kvm_load_registers(CPUState *env)
132 if (kvm_enabled())
133 kvm_arch_load_regs(env);
136 void kvm_save_registers(CPUState *env)
138 if (kvm_enabled())
139 kvm_arch_save_regs(env);
142 int kvm_cpu_exec(CPUState *env)
144 int r;
146 r = kvm_run(kvm_context, env->cpu_index);
147 if (r < 0) {
148 printf("kvm_run returned %d\n", r);
149 exit(1);
152 return 0;
155 extern int vm_running;
157 static int has_work(CPUState *env)
159 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
160 return 0;
161 if (!(env->hflags & HF_HALTED_MASK))
162 return 1;
163 return kvm_arch_has_work(env);
166 static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
167 int timeout)
169 struct timespec ts;
170 int r, e, ret = 0;
171 siginfo_t siginfo;
172 struct sigaction sa;
174 ts.tv_sec = timeout / 1000;
175 ts.tv_nsec = (timeout % 1000) * 1000000;
176 r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
177 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
178 return 0;
179 e = errno;
180 pthread_mutex_lock(&qemu_mutex);
181 if (env && vcpu)
182 cpu_single_env = vcpu->env;
183 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
184 printf("sigtimedwait: %s\n", strerror(e));
185 exit(1);
187 if (r != -1) {
188 sigaction(siginfo.si_signo, NULL, &sa);
189 sa.sa_handler(siginfo.si_signo);
190 if (siginfo.si_signo == SIGUSR2)
191 pthread_cond_signal(&qemu_aio_cond);
192 ret = 1;
194 if (env && vcpu_info[env->cpu_index].stop) {
195 vcpu_info[env->cpu_index].stop = 0;
196 vcpu_info[env->cpu_index].stopped = 1;
197 pthread_kill(io_thread, SIGUSR1);
199 pthread_mutex_unlock(&qemu_mutex);
201 return ret;
205 static void kvm_eat_signals(CPUState *env, int timeout)
207 int r = 0;
208 struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
210 while (kvm_eat_signal(waitset, env, 0))
211 r = 1;
212 if (!r && timeout) {
213 r = kvm_eat_signal(waitset, env, timeout);
214 if (r)
215 while (kvm_eat_signal(waitset, env, 0))
220 static void kvm_main_loop_wait(CPUState *env, int timeout)
222 pthread_mutex_unlock(&qemu_mutex);
223 kvm_eat_signals(env, timeout);
224 pthread_mutex_lock(&qemu_mutex);
225 cpu_single_env = env;
226 vcpu_info[env->cpu_index].signalled = 0;
229 static int all_threads_paused(void)
231 int i;
233 for (i = 0; i < smp_cpus; ++i)
234 if (vcpu_info[i].stopped)
235 return 0;
236 return 1;
239 static void pause_all_threads(void)
241 int i;
243 for (i = 0; i < smp_cpus; ++i) {
244 vcpu_info[i].stop = 1;
245 pthread_kill(vcpu_info[i].thread, SIG_IPI);
247 while (!all_threads_paused())
248 kvm_eat_signal(&io_signal_table, NULL, 1000);
251 static void resume_all_threads(void)
253 int i;
255 for (i = 0; i < smp_cpus; ++i) {
256 vcpu_info[i].stop = 0;
257 vcpu_info[i].stopped = 0;
258 pthread_kill(vcpu_info[i].thread, SIG_IPI);
262 static void kvm_vm_state_change_handler(void *context, int running)
264 if (running)
265 resume_all_threads();
266 else
267 pause_all_threads();
270 static void update_regs_for_sipi(CPUState *env)
272 kvm_arch_update_regs_for_sipi(env);
273 vcpu_info[env->cpu_index].sipi_needed = 0;
274 vcpu_info[env->cpu_index].init = 0;
277 static void update_regs_for_init(CPUState *env)
279 cpu_reset(env);
280 kvm_arch_load_regs(env);
283 static void setup_kernel_sigmask(CPUState *env)
285 sigset_t set;
287 sigprocmask(SIG_BLOCK, NULL, &set);
288 sigdelset(&set, SIG_IPI);
290 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
293 static int kvm_main_loop_cpu(CPUState *env)
295 struct vcpu_info *info = &vcpu_info[env->cpu_index];
297 setup_kernel_sigmask(env);
298 pthread_mutex_lock(&qemu_mutex);
300 kvm_qemu_init_env(env);
301 env->ready_for_interrupt_injection = 1;
302 #ifdef TARGET_I386
303 kvm_tpr_vcpu_start(env);
304 #endif
306 cpu_single_env = env;
307 while (1) {
308 while (!has_work(env))
309 kvm_main_loop_wait(env, 10);
310 if (env->interrupt_request & CPU_INTERRUPT_HARD)
311 env->hflags &= ~HF_HALTED_MASK;
312 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
313 update_regs_for_sipi(env);
314 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
315 update_regs_for_init(env);
316 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
317 kvm_cpu_exec(env);
318 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
319 kvm_main_loop_wait(env, 0);
320 if (qemu_kvm_reset_requested && env->cpu_index == 0) {
321 qemu_kvm_reset_requested = 0;
322 env->interrupt_request = 0;
323 qemu_system_reset();
324 kvm_arch_load_regs(env);
327 pthread_mutex_unlock(&qemu_mutex);
328 return 0;
331 static void *ap_main_loop(void *_env)
333 CPUState *env = _env;
334 sigset_t signals;
336 vcpu = &vcpu_info[env->cpu_index];
337 vcpu->env = env;
338 vcpu->env->thread_id = kvm_get_thread_id();
339 sigfillset(&signals);
340 sigdelset(&signals, SIG_IPI);
341 sigprocmask(SIG_BLOCK, &signals, NULL);
342 kvm_create_vcpu(kvm_context, env->cpu_index);
343 kvm_qemu_init_env(env);
344 if (kvm_irqchip_in_kernel(kvm_context))
345 env->hflags &= ~HF_HALTED_MASK;
346 kvm_main_loop_cpu(env);
347 return NULL;
350 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
352 sigemptyset(&sigtab->sigset);
353 sigfillset(&sigtab->negsigset);
356 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
358 sigaddset(&sigtab->sigset, signum);
359 sigdelset(&sigtab->negsigset, signum);
362 void kvm_init_new_ap(int cpu, CPUState *env)
364 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
367 static void qemu_kvm_init_signal_tables(void)
369 qemu_kvm_init_signal_table(&io_signal_table);
370 qemu_kvm_init_signal_table(&vcpu_signal_table);
372 kvm_add_signal(&io_signal_table, SIGIO);
373 kvm_add_signal(&io_signal_table, SIGALRM);
374 kvm_add_signal(&io_signal_table, SIGUSR1);
375 kvm_add_signal(&io_signal_table, SIGUSR2);
377 kvm_add_signal(&vcpu_signal_table, SIG_IPI);
379 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
382 int kvm_init_ap(void)
384 CPUState *env = first_cpu;
385 int i;
387 #ifdef TARGET_I386
388 kvm_tpr_opt_setup();
389 #endif
390 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
391 qemu_kvm_init_signal_tables();
393 signal(SIG_IPI, sig_ipi_handler);
394 for (i = 0; i < smp_cpus; ++i) {
395 kvm_init_new_ap(i, env);
396 env = env->next_cpu;
398 return 0;
402 * The IO thread has all signals that inform machine events
403 * blocked (io_signal_table), so it won't get interrupted
404 * while processing in main_loop_wait().
407 int kvm_main_loop(void)
409 io_thread = pthread_self();
410 pthread_mutex_unlock(&qemu_mutex);
411 while (1) {
412 kvm_eat_signal(&io_signal_table, NULL, 1000);
413 pthread_mutex_lock(&qemu_mutex);
414 cpu_single_env = NULL;
415 main_loop_wait(0);
416 if (qemu_shutdown_requested())
417 break;
418 else if (qemu_powerdown_requested())
419 qemu_system_powerdown();
420 else if (qemu_reset_requested()) {
421 pthread_kill(vcpu_info[0].thread, SIG_IPI);
422 qemu_kvm_reset_requested = 1;
424 pthread_mutex_unlock(&qemu_mutex);
427 pthread_mutex_unlock(&qemu_mutex);
428 return 0;
431 static int kvm_debug(void *opaque, int vcpu)
433 CPUState *env = cpu_single_env;
435 env->exception_index = EXCP_DEBUG;
436 return 1;
439 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
441 *data = cpu_inb(0, addr);
442 return 0;
445 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
447 *data = cpu_inw(0, addr);
448 return 0;
451 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
453 *data = cpu_inl(0, addr);
454 return 0;
457 #define PM_IO_BASE 0xb000
459 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
461 if (addr == 0xb2) {
462 switch (data) {
463 case 0: {
464 cpu_outb(0, 0xb3, 0);
465 break;
467 case 0xf0: {
468 unsigned x;
470 /* enable acpi */
471 x = cpu_inw(0, PM_IO_BASE + 4);
472 x &= ~1;
473 cpu_outw(0, PM_IO_BASE + 4, x);
474 break;
476 case 0xf1: {
477 unsigned x;
479 /* enable acpi */
480 x = cpu_inw(0, PM_IO_BASE + 4);
481 x |= 1;
482 cpu_outw(0, PM_IO_BASE + 4, x);
483 break;
485 default:
486 break;
488 return 0;
490 cpu_outb(0, addr, data);
491 return 0;
494 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
496 cpu_outw(0, addr, data);
497 return 0;
500 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
502 cpu_outl(0, addr, data);
503 return 0;
506 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
508 cpu_physical_memory_rw(addr, data, len, 0);
509 return 0;
512 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
514 cpu_physical_memory_rw(addr, data, len, 1);
515 return 0;
518 static int kvm_io_window(void *opaque)
520 return 1;
524 static int kvm_halt(void *opaque, int vcpu)
526 return kvm_arch_halt(opaque, vcpu);
529 static int kvm_shutdown(void *opaque, int vcpu)
531 qemu_system_reset_request();
532 return 1;
535 static struct kvm_callbacks qemu_kvm_ops = {
536 .debug = kvm_debug,
537 .inb = kvm_inb,
538 .inw = kvm_inw,
539 .inl = kvm_inl,
540 .outb = kvm_outb,
541 .outw = kvm_outw,
542 .outl = kvm_outl,
543 .mmio_read = kvm_mmio_read,
544 .mmio_write = kvm_mmio_write,
545 .halt = kvm_halt,
546 .shutdown = kvm_shutdown,
547 .io_window = kvm_io_window,
548 .try_push_interrupts = try_push_interrupts,
549 .post_kvm_run = post_kvm_run,
550 .pre_kvm_run = pre_kvm_run,
551 #ifdef TARGET_I386
552 .tpr_access = handle_tpr_access,
553 #endif
554 #ifdef TARGET_PPC
555 .powerpc_dcr_read = handle_powerpc_dcr_read,
556 .powerpc_dcr_write = handle_powerpc_dcr_write,
557 #endif
560 int kvm_qemu_init()
562 /* Try to initialize kvm */
563 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
564 if (!kvm_context) {
565 return -1;
567 pthread_mutex_lock(&qemu_mutex);
569 return 0;
572 int kvm_qemu_create_context(void)
574 int r;
575 if (!kvm_irqchip) {
576 kvm_disable_irqchip_creation(kvm_context);
578 if (!kvm_pit) {
579 kvm_disable_pit_creation(kvm_context);
581 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
582 kvm_qemu_destroy();
583 return -1;
585 r = kvm_arch_qemu_create_context();
586 if(r <0)
587 kvm_qemu_destroy();
588 return 0;
591 void kvm_qemu_destroy(void)
593 kvm_finalize(kvm_context);
596 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
597 unsigned long size,
598 unsigned long phys_offset)
600 #ifdef KVM_CAP_USER_MEMORY
601 int r = 0;
603 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
604 if (r) {
605 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
606 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
607 if (r)
608 return;
609 r = kvm_is_intersecting_mem(kvm_context, start_addr);
610 if (r)
611 kvm_create_mem_hole(kvm_context, start_addr, size);
612 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
613 phys_ram_base + phys_offset,
614 size, 0);
616 if (phys_offset & IO_MEM_ROM) {
617 phys_offset &= ~IO_MEM_ROM;
618 r = kvm_is_intersecting_mem(kvm_context, start_addr);
619 if (r)
620 kvm_create_mem_hole(kvm_context, start_addr, size);
621 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
622 phys_ram_base + phys_offset,
623 size, 0);
625 if (r < 0) {
626 printf("kvm_cpu_register_physical_memory: failed\n");
627 exit(1);
629 return;
631 #endif
632 if (phys_offset & IO_MEM_ROM) {
633 phys_offset &= ~IO_MEM_ROM;
634 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
638 int kvm_qemu_check_extension(int ext)
640 return kvm_check_extension(kvm_context, ext);
643 int kvm_qemu_init_env(CPUState *cenv)
645 return kvm_arch_qemu_init_env(cenv);
648 int kvm_update_debugger(CPUState *env)
650 struct kvm_debug_guest dbg;
651 int i;
653 dbg.enabled = 0;
654 if (env->nb_breakpoints || env->singlestep_enabled) {
655 dbg.enabled = 1;
656 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
657 dbg.breakpoints[i].enabled = 1;
658 dbg.breakpoints[i].address = env->breakpoints[i];
660 dbg.singlestep = env->singlestep_enabled;
662 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
667 * dirty pages logging
669 /* FIXME: use unsigned long pointer instead of unsigned char */
670 unsigned char *kvm_dirty_bitmap = NULL;
671 int kvm_physical_memory_set_dirty_tracking(int enable)
673 int r = 0;
675 if (!kvm_enabled())
676 return 0;
678 if (enable) {
679 if (!kvm_dirty_bitmap) {
680 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
681 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
682 if (kvm_dirty_bitmap == NULL) {
683 perror("Failed to allocate dirty pages bitmap");
684 r=-1;
686 else {
687 r = kvm_dirty_pages_log_enable_all(kvm_context);
691 else {
692 if (kvm_dirty_bitmap) {
693 r = kvm_dirty_pages_log_reset(kvm_context);
694 qemu_free(kvm_dirty_bitmap);
695 kvm_dirty_bitmap = NULL;
698 return r;
701 /* get kvm's dirty pages bitmap and update qemu's */
702 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
703 unsigned char *bitmap,
704 unsigned int offset,
705 unsigned long mem_size)
707 unsigned int i, j, n=0;
708 unsigned char c;
709 unsigned page_number, addr, addr1;
710 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
713 * bitmap-traveling is faster than memory-traveling (for addr...)
714 * especially when most of the memory is not dirty.
716 for (i=0; i<len; i++) {
717 c = bitmap[i];
718 while (c>0) {
719 j = ffsl(c) - 1;
720 c &= ~(1u<<j);
721 page_number = i * 8 + j;
722 addr1 = page_number * TARGET_PAGE_SIZE;
723 addr = offset + addr1;
724 cpu_physical_memory_set_dirty(addr);
725 n++;
728 return 0;
730 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
731 void *bitmap, void *opaque)
733 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
737 * get kvm's dirty pages bitmap and update qemu's
738 * we only care about physical ram, which resides in slots 0 and 3
740 int kvm_update_dirty_pages_log(void)
742 int r = 0;
745 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
746 kvm_dirty_bitmap, NULL,
747 kvm_get_dirty_bitmap_cb);
748 return r;
751 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
753 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
754 unsigned int brsize = BITMAP_SIZE(ram_size);
755 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
756 unsigned int extra_bytes = (extra_pages +7)/8;
757 unsigned int hole_start = BITMAP_SIZE(0xa0000);
758 unsigned int hole_end = BITMAP_SIZE(0xc0000);
760 memset(bitmap, 0xFF, brsize + extra_bytes);
761 memset(bitmap + hole_start, 0, hole_end - hole_start);
762 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
764 return 0;
767 #ifdef KVM_CAP_IRQCHIP
769 int kvm_set_irq(int irq, int level)
771 return kvm_set_irq_level(kvm_context, irq, level);
774 #endif
776 void qemu_kvm_aio_wait_start(void)
780 void qemu_kvm_aio_wait(void)
782 CPUState *cpu_single = cpu_single_env;
784 if (!cpu_single_env) {
785 pthread_mutex_unlock(&qemu_mutex);
786 kvm_eat_signal(&io_signal_table, NULL, 1000);
787 pthread_mutex_lock(&qemu_mutex);
788 cpu_single_env = NULL;
789 } else {
790 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
791 cpu_single_env = cpu_single;
795 void qemu_kvm_aio_wait_end(void)
799 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
801 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
804 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
805 unsigned long size, int log, int writable)
807 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
810 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
811 unsigned long size)
813 kvm_destroy_phys_mem(kvm_context, start_addr, size);